docparser 0.0.1 → 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (81) hide show
  1. checksums.yaml +4 -4
  2. data/.gitignore +2 -0
  3. data/.rubocop.yml +5 -0
  4. data/.travis.yml +3 -0
  5. data/Gemfile +9 -1
  6. data/README.md +11 -4
  7. data/Rakefile +15 -0
  8. data/example.rb +9 -7
  9. data/lib/docparser.rb +1 -0
  10. data/lib/docparser/document.rb +18 -11
  11. data/lib/docparser/output.rb +8 -8
  12. data/lib/docparser/output/html_output.rb +53 -47
  13. data/lib/docparser/output/json_output.rb +8 -3
  14. data/lib/docparser/output/multi_output.rb +4 -8
  15. data/lib/docparser/output/nil_output.rb +21 -0
  16. data/lib/docparser/output/screen_output.rb +2 -1
  17. data/lib/docparser/output/xlsx_output.rb +12 -2
  18. data/lib/docparser/output/yaml_output.rb +6 -1
  19. data/lib/docparser/parser.rb +80 -49
  20. data/lib/docparser/version.rb +1 -1
  21. data/test/lib/docparser/blackbox_test.rb +29 -0
  22. data/test/lib/docparser/document_test.rb +134 -0
  23. data/test/lib/docparser/logging_test.rb +19 -0
  24. data/test/lib/docparser/output/csv_output_test.rb +51 -0
  25. data/test/lib/docparser/output/html_output_test.rb +57 -0
  26. data/test/lib/docparser/output/json_output_test.rb +65 -0
  27. data/test/lib/docparser/output/multi_output_test.rb +80 -0
  28. data/test/lib/docparser/output/nil_output_test.rb +27 -0
  29. data/test/lib/docparser/output/screen_output_test.rb +55 -0
  30. data/test/lib/docparser/output/xlsx_output_test.rb +53 -0
  31. data/test/lib/docparser/output/yaml_output_test.rb +76 -0
  32. data/test/lib/docparser/output_test.rb +85 -0
  33. data/test/lib/docparser/parser_test.rb +197 -0
  34. data/test/lib/docparser/version_test.rb +11 -0
  35. data/test/support/hackaday/dl.rb +4 -0
  36. data/test/support/hackaday/file_1.html +716 -0
  37. data/test/support/hackaday/file_10.html +791 -0
  38. data/test/support/hackaday/file_11.html +787 -0
  39. data/test/support/hackaday/file_12.html +715 -0
  40. data/test/support/hackaday/file_13.html +793 -0
  41. data/test/support/hackaday/file_14.html +718 -0
  42. data/test/support/hackaday/file_15.html +707 -0
  43. data/test/support/hackaday/file_16.html +713 -0
  44. data/test/support/hackaday/file_17.html +715 -0
  45. data/test/support/hackaday/file_18.html +725 -0
  46. data/test/support/hackaday/file_19.html +715 -0
  47. data/test/support/hackaday/file_2.html +793 -0
  48. data/test/support/hackaday/file_20.html +795 -0
  49. data/test/support/hackaday/file_21.html +804 -0
  50. data/test/support/hackaday/file_22.html +722 -0
  51. data/test/support/hackaday/file_23.html +793 -0
  52. data/test/support/hackaday/file_24.html +717 -0
  53. data/test/support/hackaday/file_25.html +715 -0
  54. data/test/support/hackaday/file_26.html +717 -0
  55. data/test/support/hackaday/file_27.html +723 -0
  56. data/test/support/hackaday/file_28.html +711 -0
  57. data/test/support/hackaday/file_29.html +711 -0
  58. data/test/support/hackaday/file_3.html +794 -0
  59. data/test/support/hackaday/file_30.html +715 -0
  60. data/test/support/hackaday/file_31.html +713 -0
  61. data/test/support/hackaday/file_32.html +714 -0
  62. data/test/support/hackaday/file_33.html +716 -0
  63. data/test/support/hackaday/file_34.html +714 -0
  64. data/test/support/hackaday/file_35.html +792 -0
  65. data/test/support/hackaday/file_36.html +719 -0
  66. data/test/support/hackaday/file_37.html +712 -0
  67. data/test/support/hackaday/file_38.html +709 -0
  68. data/test/support/hackaday/file_39.html +808 -0
  69. data/test/support/hackaday/file_4.html +814 -0
  70. data/test/support/hackaday/file_40.html +801 -0
  71. data/test/support/hackaday/file_5.html +715 -0
  72. data/test/support/hackaday/file_6.html +792 -0
  73. data/test/support/hackaday/file_7.html +714 -0
  74. data/test/support/hackaday/file_8.html +717 -0
  75. data/test/support/hackaday/file_9.html +719 -0
  76. data/test/support/test_encoding.html +12 -0
  77. data/test/support/test_encoding2.html +12 -0
  78. data/test/support/test_html.html +16 -0
  79. data/test/support/test_xml.xml +5 -0
  80. data/test/test_helper.rb +14 -0
  81. metadata +126 -3
@@ -0,0 +1,51 @@
1
+ require_relative '../../../test_helper'
2
+
3
+ describe DocParser::CSVOutput do
4
+ before do
5
+ Log4r::Logger['docparser'].level = Log4r::ERROR
6
+ end
7
+ after do
8
+ Log4r::Logger['docparser'].level = Log4r::INFO
9
+ end
10
+
11
+ it 'must create a file' do
12
+ Dir.mktmpdir do |dir|
13
+ filename = File.join(dir, 'test.csv')
14
+ DocParser::CSVOutput.new(filename: filename)
15
+ File.exists?(filename).must_equal true
16
+ end
17
+ end
18
+
19
+ it 'must save the header' do
20
+ Dir.mktmpdir do |dir|
21
+ filename = File.join(dir, 'test.csv')
22
+ output = DocParser::CSVOutput.new(filename: filename)
23
+ output.header = 'test', 'the', 'header'
24
+ output.close
25
+ open(filename).read.must_equal "test;the;header\n"
26
+ end
27
+ end
28
+
29
+ it 'must save some rows' do
30
+ Dir.mktmpdir do |dir|
31
+ filename = File.join(dir, 'test.csv')
32
+ output = DocParser::CSVOutput.new(filename: filename)
33
+ output.add_row ['aap', 'noot', 'mies']
34
+ output.add_row ['aap', 'noot', 'mies;']
35
+ output.close
36
+ open(filename).read.must_equal "aap;noot;mies\naap;noot;\"mies;\"\n"
37
+ end
38
+ end
39
+
40
+ it 'must give the correct rowcount' do
41
+ Dir.mktmpdir do |dir|
42
+ filename = File.join(dir, 'test.csv')
43
+ output = DocParser::CSVOutput.new(filename: filename)
44
+ output.header = 'test', 'the', 'header'
45
+ output.rowcount.must_equal 0
46
+ output.add_row ['aap', 'noot', 'mies']
47
+ output.add_row ['aap', 'noot', 'mies']
48
+ output.rowcount.must_equal 2
49
+ end
50
+ end
51
+ end
@@ -0,0 +1,57 @@
1
+ require_relative '../../../test_helper'
2
+
3
+ describe DocParser::HTMLOutput do
4
+ before do
5
+ Log4r::Logger['docparser'].level = Log4r::ERROR
6
+ end
7
+ after do
8
+ Log4r::Logger['docparser'].level = Log4r::INFO
9
+ end
10
+
11
+ it 'must create a file' do
12
+ Dir.mktmpdir do |dir|
13
+ filename = File.join(dir, 'test.html')
14
+ DocParser::HTMLOutput.new(filename: filename)
15
+ File.exists?(filename).must_equal true
16
+ end
17
+ end
18
+
19
+ it 'must save the header' do
20
+ Dir.mktmpdir do |dir|
21
+ filename = File.join(dir, 'test.html')
22
+ output = DocParser::HTMLOutput.new(filename: filename)
23
+ output.header = 'test', 'the', 'header'
24
+ output.close
25
+ open(filename).read.must_include '<thead><tr><th>test</th><th>the</th>'\
26
+ '<th>header</th></tr></thead>'
27
+ end
28
+ end
29
+
30
+ it 'must save some rows' do
31
+ Dir.mktmpdir do |dir|
32
+ filename = File.join(dir, 'test.html')
33
+ output = DocParser::HTMLOutput.new(filename: filename)
34
+ output.add_row ['aap', 'noot', 'mies']
35
+ output.add_row ['aap', 'noot', 'mies;']
36
+ output.close
37
+ html = open(filename).read
38
+ html.must_include 'tbody'
39
+ html.must_include '<tr><td>aap</td><td>noot</td><td>mies</td></tr>'
40
+ html.must_include '<tr><td>aap</td><td>noot</td><td>mies;</td></tr>'
41
+ end
42
+ end
43
+
44
+ it 'must give the correct rowcount' do
45
+ Dir.mktmpdir do |dir|
46
+ filename = File.join(dir, 'test.html')
47
+ output = DocParser::HTMLOutput.new(filename: filename)
48
+ output.header = 'test', 'the', 'header'
49
+ output.rowcount.must_equal 0
50
+ output.add_row ['aap', 'noot', 'mies']
51
+ output.add_row ['aap', 'noot', 'mies']
52
+ output.rowcount.must_equal 2
53
+ output.close
54
+ open(filename).read.must_include('<p>2 rows</p>')
55
+ end
56
+ end
57
+ end
@@ -0,0 +1,65 @@
1
+ require_relative '../../../test_helper'
2
+
3
+ describe DocParser::JSONOutput do
4
+ before do
5
+ Log4r::Logger['docparser'].level = Log4r::ERROR
6
+ end
7
+ after do
8
+ Log4r::Logger['docparser'].level = Log4r::INFO
9
+ end
10
+
11
+ it 'must create a file' do
12
+ Dir.mktmpdir do |dir|
13
+ filename = File.join(dir, 'test.json')
14
+ DocParser::JSONOutput.new(filename: filename)
15
+ File.exists?(filename).must_equal true
16
+ end
17
+ end
18
+
19
+ it 'must save the header' do
20
+ Dir.mktmpdir do |dir|
21
+ filename = File.join(dir, 'test.json')
22
+ output = DocParser::JSONOutput.new(filename: filename)
23
+ output.header = 'test', 'the', 'header'
24
+ output.close
25
+ open(filename).read.must_equal '[]'
26
+ end
27
+ end
28
+
29
+ it 'must have a header' do
30
+ Dir.mktmpdir do |dir|
31
+ filename = File.join(dir, 'test.json')
32
+ output = DocParser::JSONOutput.new(filename: filename)
33
+ -> do
34
+ output.add_row ['aap', 'noot', 'mies']
35
+ end.must_raise(DocParser::MissingHeaderException)
36
+ end
37
+ end
38
+
39
+ it 'must save some rows' do
40
+ Dir.mktmpdir do |dir|
41
+ filename = File.join(dir, 'test.json')
42
+ output = DocParser::JSONOutput.new(filename: filename)
43
+ output.header = 'test', 'the', 'header'
44
+ output.add_row ['a', 'b', 'c']
45
+ output.add_row ['aap', 'noot', 'mies"']
46
+ output.add_row ['aap', 'noot'] # testing empty column
47
+ output.close
48
+ open(filename).read.must_equal '[{"test":"a","the":"b","header":"c"}'\
49
+ ',{"test":"aap","the":"noot","header":"mies\""}'\
50
+ ',{"test":"aap","the":"noot","header":""}]'
51
+ end
52
+ end
53
+
54
+ it 'must give the correct rowcount' do
55
+ Dir.mktmpdir do |dir|
56
+ filename = File.join(dir, 'test.json')
57
+ output = DocParser::JSONOutput.new(filename: filename)
58
+ output.header = 'test', 'the', 'header'
59
+ output.rowcount.must_equal 0
60
+ output.add_row ['aap', 'noot', 'mies']
61
+ output.add_row ['aap', 'noot', 'mies']
62
+ output.rowcount.must_equal 2
63
+ end
64
+ end
65
+ end
@@ -0,0 +1,80 @@
1
+ require_relative '../../../test_helper'
2
+
3
+ describe DocParser::MultiOutput do
4
+ before do
5
+ Log4r::Logger['docparser'].level = Log4r::ERROR
6
+ end
7
+ after do
8
+ Log4r::Logger['docparser'].level = Log4r::INFO
9
+ end
10
+
11
+ it 'must create files' do
12
+ Dir.mktmpdir do |dir|
13
+ filename = File.join(dir, 'test')
14
+ DocParser::MultiOutput.new(filename: filename)
15
+
16
+ File.exists?(filename).must_equal false
17
+ ['.csv', '.html', '.yml', '.xlsx', '.json'].each do |ext|
18
+ File.exists?(filename + ext).must_equal true
19
+ end
20
+ end
21
+ end
22
+
23
+ it 'must save the header' do
24
+ Dir.mktmpdir do |dir|
25
+ filename = File.join(dir, 'test')
26
+ output = DocParser::MultiOutput.new(filename: filename)
27
+ output.header = 'test', 'the', 'header'
28
+ output.close
29
+ open(filename + '.yml').read.must_equal ''
30
+ open(filename + '.csv').read.must_equal "test;the;header\n"
31
+ end
32
+ end
33
+
34
+ it 'must have a header' do
35
+ Dir.mktmpdir do |dir|
36
+ filename = File.join(dir, 'test')
37
+ output = DocParser::MultiOutput.new(filename: filename)
38
+ -> do
39
+ output.add_row ['aap', 'noot', 'mies']
40
+ end.must_raise(DocParser::MissingHeaderException)
41
+ end
42
+ end
43
+
44
+ it 'must give the correct rowcount' do
45
+ Dir.mktmpdir do |dir|
46
+ filename = File.join(dir, 'test')
47
+ output = DocParser::MultiOutput.new(filename: filename)
48
+ output.header = 'test', 'the', 'header'
49
+ output.rowcount.must_equal 0
50
+ output.add_row ['aap', 'noot', 'mies']
51
+ output.add_row ['aap', 'noot', 'mies']
52
+ output.rowcount.must_equal 2
53
+ end
54
+ end
55
+
56
+ it 'must delegate methods' do
57
+ Dir.mktmpdir do |dir|
58
+ filename = File.join(dir, 'test')
59
+ output = DocParser::MultiOutput.new(filename: filename)
60
+ methods = [:add_row, :header=, :close]
61
+ outputs = output.instance_variable_get(:@outputs)
62
+ outputs.map! do |o|
63
+ SimpleMock.new o
64
+ end
65
+ output.instance_variable_set(:@outputs, outputs)
66
+ methods.each do |method_name|
67
+ method = output.method(method_name)
68
+ arity = method.arity
69
+ outputs.map do |o|
70
+ o.expect(method_name, nil, [nil] * arity)
71
+ end
72
+ output.send(method_name, *([nil] * arity))
73
+ end
74
+ outputs.map do |o|
75
+ o.verify.must_equal true
76
+ end
77
+ end
78
+ end
79
+
80
+ end
@@ -0,0 +1,27 @@
1
+ require_relative '../../../test_helper'
2
+
3
+ describe DocParser::NilOutput do
4
+ before do
5
+ Log4r::Logger['docparser'].level = Log4r::ERROR
6
+ end
7
+ after do
8
+ Log4r::Logger['docparser'].level = Log4r::INFO
9
+ end
10
+
11
+ it 'should not create a file' do
12
+ Dir.mktmpdir do |dir|
13
+ filename = File.join(dir, '*')
14
+ DocParser::NilOutput.new
15
+ Dir[filename].must_be_empty
16
+ end
17
+ end
18
+
19
+ it 'must give the correct rowcount' do
20
+ output = DocParser::NilOutput.new
21
+ output.header = 'test', 'the', 'header'
22
+ output.rowcount.must_equal 0
23
+ output.add_row ['aap', 'noot', 'mies']
24
+ output.add_row ['aap', 'noot', 'mies']
25
+ output.rowcount.must_equal 0
26
+ end
27
+ end
@@ -0,0 +1,55 @@
1
+ require_relative '../../../test_helper'
2
+ require 'stringio'
3
+ describe DocParser::ScreenOutput do
4
+ before do
5
+ Log4r::Logger['docparser'].level = Log4r::ERROR
6
+ end
7
+ after do
8
+ Log4r::Logger['docparser'].level = Log4r::INFO
9
+ end
10
+
11
+ it 'should not create a file' do
12
+ Dir.mktmpdir do |dir|
13
+ filename = File.join(dir, '*')
14
+ DocParser::ScreenOutput.new
15
+ Dir[filename].must_be_empty
16
+ end
17
+ end
18
+
19
+ it 'must give the correct rowcount' do
20
+ output = DocParser::ScreenOutput.new
21
+ output.header = 'test', 'the', 'header'
22
+ output.rowcount.must_equal 0
23
+ output.add_row ['aap', 'noot', 'mies']
24
+ output.add_row ['aap', 'noot', 'mies']
25
+ output.rowcount.must_equal 2
26
+ end
27
+
28
+ it 'must have a header' do
29
+ output = DocParser::ScreenOutput.new
30
+ -> do
31
+ output.add_row ['aap', 'noot', 'mies']
32
+ end.must_raise(DocParser::MissingHeaderException)
33
+ end
34
+
35
+
36
+ it 'must output the data after close' do
37
+ $out = StringIO.new
38
+ output = Class.new DocParser::ScreenOutput do
39
+ def page(*args, &p)
40
+ args << p
41
+ args.compact!
42
+ page_to $out, args
43
+ end
44
+ end.new
45
+ output.header = 'test', 'the', 'header'
46
+ output.add_row ['aap1', '', 'mies']
47
+ output.add_row ['aap2', 'mies1']
48
+ output.close
49
+ out = $out.string
50
+ out.must_include 'header'
51
+ out.must_include 'aap1'
52
+ out.must_include 'mies1'
53
+ out.must_include 'mies'
54
+ end
55
+ end
@@ -0,0 +1,53 @@
1
+ require_relative '../../../test_helper'
2
+
3
+ describe DocParser::XLSXOutput do
4
+ before do
5
+ Log4r::Logger['docparser'].level = Log4r::ERROR
6
+ end
7
+ after do
8
+ Log4r::Logger['docparser'].level = Log4r::INFO
9
+ end
10
+
11
+ it 'must create a file' do
12
+ Dir.mktmpdir do |dir|
13
+ filename = File.join(dir, 'test.xlsx')
14
+ DocParser::XLSXOutput.new(filename: filename)
15
+ File.exists?(filename).must_equal true
16
+ end
17
+ end
18
+
19
+ it 'must save the header' do
20
+ Dir.mktmpdir do |dir|
21
+ filename = File.join(dir, 'test.xlsx')
22
+ output = DocParser::XLSXOutput.new(filename: filename)
23
+ output.header = 'test', 'the', 'header'
24
+ output.close
25
+ sheet = output.instance_variable_get(:@sheet)
26
+ sheet.rows.length.must_equal(1)
27
+ end
28
+ end
29
+
30
+ it 'must save some rows' do
31
+ Dir.mktmpdir do |dir|
32
+ filename = File.join(dir, 'test.xlsx')
33
+ output = DocParser::XLSXOutput.new(filename: filename)
34
+ output.add_row ['aap', 'noot', 'mies']
35
+ output.add_row ['aap', 'noot', 'mies;']
36
+ output.close
37
+ sheet = output.instance_variable_get(:@sheet)
38
+ sheet.rows.length.must_equal(2)
39
+ end
40
+ end
41
+
42
+ it 'must give the correct rowcount' do
43
+ Dir.mktmpdir do |dir|
44
+ filename = File.join(dir, 'test.xlsx')
45
+ output = DocParser::XLSXOutput.new(filename: filename)
46
+ output.header = 'test', 'the', 'header'
47
+ output.rowcount.must_equal 0
48
+ output.add_row ['aap', 'noot', 'mies']
49
+ output.add_row ['aap', 'noot', 'mies']
50
+ output.rowcount.must_equal 2
51
+ end
52
+ end
53
+ end
@@ -0,0 +1,76 @@
1
+ require_relative '../../../test_helper'
2
+
3
+ describe DocParser::YAMLOutput do
4
+ before do
5
+ Log4r::Logger['docparser'].level = Log4r::ERROR
6
+ end
7
+ after do
8
+ Log4r::Logger['docparser'].level = Log4r::INFO
9
+ end
10
+
11
+ it 'must create a file' do
12
+ Dir.mktmpdir do |dir|
13
+ filename = File.join(dir, 'test.yml')
14
+ DocParser::YAMLOutput.new(filename: filename)
15
+ File.exists?(filename).must_equal true
16
+ end
17
+ end
18
+
19
+ it 'must save the header' do
20
+ Dir.mktmpdir do |dir|
21
+ filename = File.join(dir, 'test.yml')
22
+ output = DocParser::YAMLOutput.new(filename: filename)
23
+ output.header = 'test', 'the', 'header'
24
+ output.close
25
+ open(filename).read.must_equal ''
26
+ end
27
+ end
28
+
29
+ it 'must have a header' do
30
+ Dir.mktmpdir do |dir|
31
+ filename = File.join(dir, 'test.yml')
32
+ output = DocParser::YAMLOutput.new(filename: filename)
33
+ -> do
34
+ output.add_row ['aap', 'noot', 'mies']
35
+ end.must_raise(DocParser::MissingHeaderException)
36
+ end
37
+ end
38
+
39
+ it 'must save some rows' do
40
+ Dir.mktmpdir do |dir|
41
+ filename = File.join(dir, 'test.csv')
42
+ output = DocParser::YAMLOutput.new(filename: filename)
43
+ output.header = 'test', 'the', 'header'
44
+ output.add_row ['a', 'b', 'c']
45
+ output.add_row ['aap', 'noot', 'mies"']
46
+ output.add_row ['aap', 'noot'] # testing empty column
47
+ output.close
48
+ open(filename).read.must_equal <<-YAMLEND
49
+ ---
50
+ test: a
51
+ the: b
52
+ header: c
53
+ ---
54
+ test: aap
55
+ the: noot
56
+ header: mies\"
57
+ ---
58
+ test: aap
59
+ the: noot
60
+ header: ''
61
+ YAMLEND
62
+ end
63
+ end
64
+
65
+ it 'must give the correct rowcount' do
66
+ Dir.mktmpdir do |dir|
67
+ filename = File.join(dir, 'test.yml')
68
+ output = DocParser::YAMLOutput.new(filename: filename)
69
+ output.header = 'test', 'the', 'header'
70
+ output.rowcount.must_equal 0
71
+ output.add_row ['aap', 'noot', 'mies']
72
+ output.add_row ['aap', 'noot', 'mies']
73
+ output.rowcount.must_equal 2
74
+ end
75
+ end
76
+ end