docparser 0.0.1 → 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (81) hide show
  1. checksums.yaml +4 -4
  2. data/.gitignore +2 -0
  3. data/.rubocop.yml +5 -0
  4. data/.travis.yml +3 -0
  5. data/Gemfile +9 -1
  6. data/README.md +11 -4
  7. data/Rakefile +15 -0
  8. data/example.rb +9 -7
  9. data/lib/docparser.rb +1 -0
  10. data/lib/docparser/document.rb +18 -11
  11. data/lib/docparser/output.rb +8 -8
  12. data/lib/docparser/output/html_output.rb +53 -47
  13. data/lib/docparser/output/json_output.rb +8 -3
  14. data/lib/docparser/output/multi_output.rb +4 -8
  15. data/lib/docparser/output/nil_output.rb +21 -0
  16. data/lib/docparser/output/screen_output.rb +2 -1
  17. data/lib/docparser/output/xlsx_output.rb +12 -2
  18. data/lib/docparser/output/yaml_output.rb +6 -1
  19. data/lib/docparser/parser.rb +80 -49
  20. data/lib/docparser/version.rb +1 -1
  21. data/test/lib/docparser/blackbox_test.rb +29 -0
  22. data/test/lib/docparser/document_test.rb +134 -0
  23. data/test/lib/docparser/logging_test.rb +19 -0
  24. data/test/lib/docparser/output/csv_output_test.rb +51 -0
  25. data/test/lib/docparser/output/html_output_test.rb +57 -0
  26. data/test/lib/docparser/output/json_output_test.rb +65 -0
  27. data/test/lib/docparser/output/multi_output_test.rb +80 -0
  28. data/test/lib/docparser/output/nil_output_test.rb +27 -0
  29. data/test/lib/docparser/output/screen_output_test.rb +55 -0
  30. data/test/lib/docparser/output/xlsx_output_test.rb +53 -0
  31. data/test/lib/docparser/output/yaml_output_test.rb +76 -0
  32. data/test/lib/docparser/output_test.rb +85 -0
  33. data/test/lib/docparser/parser_test.rb +197 -0
  34. data/test/lib/docparser/version_test.rb +11 -0
  35. data/test/support/hackaday/dl.rb +4 -0
  36. data/test/support/hackaday/file_1.html +716 -0
  37. data/test/support/hackaday/file_10.html +791 -0
  38. data/test/support/hackaday/file_11.html +787 -0
  39. data/test/support/hackaday/file_12.html +715 -0
  40. data/test/support/hackaday/file_13.html +793 -0
  41. data/test/support/hackaday/file_14.html +718 -0
  42. data/test/support/hackaday/file_15.html +707 -0
  43. data/test/support/hackaday/file_16.html +713 -0
  44. data/test/support/hackaday/file_17.html +715 -0
  45. data/test/support/hackaday/file_18.html +725 -0
  46. data/test/support/hackaday/file_19.html +715 -0
  47. data/test/support/hackaday/file_2.html +793 -0
  48. data/test/support/hackaday/file_20.html +795 -0
  49. data/test/support/hackaday/file_21.html +804 -0
  50. data/test/support/hackaday/file_22.html +722 -0
  51. data/test/support/hackaday/file_23.html +793 -0
  52. data/test/support/hackaday/file_24.html +717 -0
  53. data/test/support/hackaday/file_25.html +715 -0
  54. data/test/support/hackaday/file_26.html +717 -0
  55. data/test/support/hackaday/file_27.html +723 -0
  56. data/test/support/hackaday/file_28.html +711 -0
  57. data/test/support/hackaday/file_29.html +711 -0
  58. data/test/support/hackaday/file_3.html +794 -0
  59. data/test/support/hackaday/file_30.html +715 -0
  60. data/test/support/hackaday/file_31.html +713 -0
  61. data/test/support/hackaday/file_32.html +714 -0
  62. data/test/support/hackaday/file_33.html +716 -0
  63. data/test/support/hackaday/file_34.html +714 -0
  64. data/test/support/hackaday/file_35.html +792 -0
  65. data/test/support/hackaday/file_36.html +719 -0
  66. data/test/support/hackaday/file_37.html +712 -0
  67. data/test/support/hackaday/file_38.html +709 -0
  68. data/test/support/hackaday/file_39.html +808 -0
  69. data/test/support/hackaday/file_4.html +814 -0
  70. data/test/support/hackaday/file_40.html +801 -0
  71. data/test/support/hackaday/file_5.html +715 -0
  72. data/test/support/hackaday/file_6.html +792 -0
  73. data/test/support/hackaday/file_7.html +714 -0
  74. data/test/support/hackaday/file_8.html +717 -0
  75. data/test/support/hackaday/file_9.html +719 -0
  76. data/test/support/test_encoding.html +12 -0
  77. data/test/support/test_encoding2.html +12 -0
  78. data/test/support/test_html.html +16 -0
  79. data/test/support/test_xml.xml +5 -0
  80. data/test/test_helper.rb +14 -0
  81. metadata +126 -3
@@ -0,0 +1,51 @@
1
+ require_relative '../../../test_helper'
2
+
3
+ describe DocParser::CSVOutput do
4
+ before do
5
+ Log4r::Logger['docparser'].level = Log4r::ERROR
6
+ end
7
+ after do
8
+ Log4r::Logger['docparser'].level = Log4r::INFO
9
+ end
10
+
11
+ it 'must create a file' do
12
+ Dir.mktmpdir do |dir|
13
+ filename = File.join(dir, 'test.csv')
14
+ DocParser::CSVOutput.new(filename: filename)
15
+ File.exists?(filename).must_equal true
16
+ end
17
+ end
18
+
19
+ it 'must save the header' do
20
+ Dir.mktmpdir do |dir|
21
+ filename = File.join(dir, 'test.csv')
22
+ output = DocParser::CSVOutput.new(filename: filename)
23
+ output.header = 'test', 'the', 'header'
24
+ output.close
25
+ open(filename).read.must_equal "test;the;header\n"
26
+ end
27
+ end
28
+
29
+ it 'must save some rows' do
30
+ Dir.mktmpdir do |dir|
31
+ filename = File.join(dir, 'test.csv')
32
+ output = DocParser::CSVOutput.new(filename: filename)
33
+ output.add_row ['aap', 'noot', 'mies']
34
+ output.add_row ['aap', 'noot', 'mies;']
35
+ output.close
36
+ open(filename).read.must_equal "aap;noot;mies\naap;noot;\"mies;\"\n"
37
+ end
38
+ end
39
+
40
+ it 'must give the correct rowcount' do
41
+ Dir.mktmpdir do |dir|
42
+ filename = File.join(dir, 'test.csv')
43
+ output = DocParser::CSVOutput.new(filename: filename)
44
+ output.header = 'test', 'the', 'header'
45
+ output.rowcount.must_equal 0
46
+ output.add_row ['aap', 'noot', 'mies']
47
+ output.add_row ['aap', 'noot', 'mies']
48
+ output.rowcount.must_equal 2
49
+ end
50
+ end
51
+ end
@@ -0,0 +1,57 @@
1
+ require_relative '../../../test_helper'
2
+
3
+ describe DocParser::HTMLOutput do
4
+ before do
5
+ Log4r::Logger['docparser'].level = Log4r::ERROR
6
+ end
7
+ after do
8
+ Log4r::Logger['docparser'].level = Log4r::INFO
9
+ end
10
+
11
+ it 'must create a file' do
12
+ Dir.mktmpdir do |dir|
13
+ filename = File.join(dir, 'test.html')
14
+ DocParser::HTMLOutput.new(filename: filename)
15
+ File.exists?(filename).must_equal true
16
+ end
17
+ end
18
+
19
+ it 'must save the header' do
20
+ Dir.mktmpdir do |dir|
21
+ filename = File.join(dir, 'test.html')
22
+ output = DocParser::HTMLOutput.new(filename: filename)
23
+ output.header = 'test', 'the', 'header'
24
+ output.close
25
+ open(filename).read.must_include '<thead><tr><th>test</th><th>the</th>'\
26
+ '<th>header</th></tr></thead>'
27
+ end
28
+ end
29
+
30
+ it 'must save some rows' do
31
+ Dir.mktmpdir do |dir|
32
+ filename = File.join(dir, 'test.html')
33
+ output = DocParser::HTMLOutput.new(filename: filename)
34
+ output.add_row ['aap', 'noot', 'mies']
35
+ output.add_row ['aap', 'noot', 'mies;']
36
+ output.close
37
+ html = open(filename).read
38
+ html.must_include 'tbody'
39
+ html.must_include '<tr><td>aap</td><td>noot</td><td>mies</td></tr>'
40
+ html.must_include '<tr><td>aap</td><td>noot</td><td>mies;</td></tr>'
41
+ end
42
+ end
43
+
44
+ it 'must give the correct rowcount' do
45
+ Dir.mktmpdir do |dir|
46
+ filename = File.join(dir, 'test.html')
47
+ output = DocParser::HTMLOutput.new(filename: filename)
48
+ output.header = 'test', 'the', 'header'
49
+ output.rowcount.must_equal 0
50
+ output.add_row ['aap', 'noot', 'mies']
51
+ output.add_row ['aap', 'noot', 'mies']
52
+ output.rowcount.must_equal 2
53
+ output.close
54
+ open(filename).read.must_include('<p>2 rows</p>')
55
+ end
56
+ end
57
+ end
@@ -0,0 +1,65 @@
1
+ require_relative '../../../test_helper'
2
+
3
+ describe DocParser::JSONOutput do
4
+ before do
5
+ Log4r::Logger['docparser'].level = Log4r::ERROR
6
+ end
7
+ after do
8
+ Log4r::Logger['docparser'].level = Log4r::INFO
9
+ end
10
+
11
+ it 'must create a file' do
12
+ Dir.mktmpdir do |dir|
13
+ filename = File.join(dir, 'test.json')
14
+ DocParser::JSONOutput.new(filename: filename)
15
+ File.exists?(filename).must_equal true
16
+ end
17
+ end
18
+
19
+ it 'must save the header' do
20
+ Dir.mktmpdir do |dir|
21
+ filename = File.join(dir, 'test.json')
22
+ output = DocParser::JSONOutput.new(filename: filename)
23
+ output.header = 'test', 'the', 'header'
24
+ output.close
25
+ open(filename).read.must_equal '[]'
26
+ end
27
+ end
28
+
29
+ it 'must have a header' do
30
+ Dir.mktmpdir do |dir|
31
+ filename = File.join(dir, 'test.json')
32
+ output = DocParser::JSONOutput.new(filename: filename)
33
+ -> do
34
+ output.add_row ['aap', 'noot', 'mies']
35
+ end.must_raise(DocParser::MissingHeaderException)
36
+ end
37
+ end
38
+
39
+ it 'must save some rows' do
40
+ Dir.mktmpdir do |dir|
41
+ filename = File.join(dir, 'test.json')
42
+ output = DocParser::JSONOutput.new(filename: filename)
43
+ output.header = 'test', 'the', 'header'
44
+ output.add_row ['a', 'b', 'c']
45
+ output.add_row ['aap', 'noot', 'mies"']
46
+ output.add_row ['aap', 'noot'] # testing empty column
47
+ output.close
48
+ open(filename).read.must_equal '[{"test":"a","the":"b","header":"c"}'\
49
+ ',{"test":"aap","the":"noot","header":"mies\""}'\
50
+ ',{"test":"aap","the":"noot","header":""}]'
51
+ end
52
+ end
53
+
54
+ it 'must give the correct rowcount' do
55
+ Dir.mktmpdir do |dir|
56
+ filename = File.join(dir, 'test.json')
57
+ output = DocParser::JSONOutput.new(filename: filename)
58
+ output.header = 'test', 'the', 'header'
59
+ output.rowcount.must_equal 0
60
+ output.add_row ['aap', 'noot', 'mies']
61
+ output.add_row ['aap', 'noot', 'mies']
62
+ output.rowcount.must_equal 2
63
+ end
64
+ end
65
+ end
@@ -0,0 +1,80 @@
1
+ require_relative '../../../test_helper'
2
+
3
+ describe DocParser::MultiOutput do
4
+ before do
5
+ Log4r::Logger['docparser'].level = Log4r::ERROR
6
+ end
7
+ after do
8
+ Log4r::Logger['docparser'].level = Log4r::INFO
9
+ end
10
+
11
+ it 'must create files' do
12
+ Dir.mktmpdir do |dir|
13
+ filename = File.join(dir, 'test')
14
+ DocParser::MultiOutput.new(filename: filename)
15
+
16
+ File.exists?(filename).must_equal false
17
+ ['.csv', '.html', '.yml', '.xlsx', '.json'].each do |ext|
18
+ File.exists?(filename + ext).must_equal true
19
+ end
20
+ end
21
+ end
22
+
23
+ it 'must save the header' do
24
+ Dir.mktmpdir do |dir|
25
+ filename = File.join(dir, 'test')
26
+ output = DocParser::MultiOutput.new(filename: filename)
27
+ output.header = 'test', 'the', 'header'
28
+ output.close
29
+ open(filename + '.yml').read.must_equal ''
30
+ open(filename + '.csv').read.must_equal "test;the;header\n"
31
+ end
32
+ end
33
+
34
+ it 'must have a header' do
35
+ Dir.mktmpdir do |dir|
36
+ filename = File.join(dir, 'test')
37
+ output = DocParser::MultiOutput.new(filename: filename)
38
+ -> do
39
+ output.add_row ['aap', 'noot', 'mies']
40
+ end.must_raise(DocParser::MissingHeaderException)
41
+ end
42
+ end
43
+
44
+ it 'must give the correct rowcount' do
45
+ Dir.mktmpdir do |dir|
46
+ filename = File.join(dir, 'test')
47
+ output = DocParser::MultiOutput.new(filename: filename)
48
+ output.header = 'test', 'the', 'header'
49
+ output.rowcount.must_equal 0
50
+ output.add_row ['aap', 'noot', 'mies']
51
+ output.add_row ['aap', 'noot', 'mies']
52
+ output.rowcount.must_equal 2
53
+ end
54
+ end
55
+
56
+ it 'must delegate methods' do
57
+ Dir.mktmpdir do |dir|
58
+ filename = File.join(dir, 'test')
59
+ output = DocParser::MultiOutput.new(filename: filename)
60
+ methods = [:add_row, :header=, :close]
61
+ outputs = output.instance_variable_get(:@outputs)
62
+ outputs.map! do |o|
63
+ SimpleMock.new o
64
+ end
65
+ output.instance_variable_set(:@outputs, outputs)
66
+ methods.each do |method_name|
67
+ method = output.method(method_name)
68
+ arity = method.arity
69
+ outputs.map do |o|
70
+ o.expect(method_name, nil, [nil] * arity)
71
+ end
72
+ output.send(method_name, *([nil] * arity))
73
+ end
74
+ outputs.map do |o|
75
+ o.verify.must_equal true
76
+ end
77
+ end
78
+ end
79
+
80
+ end
@@ -0,0 +1,27 @@
1
+ require_relative '../../../test_helper'
2
+
3
+ describe DocParser::NilOutput do
4
+ before do
5
+ Log4r::Logger['docparser'].level = Log4r::ERROR
6
+ end
7
+ after do
8
+ Log4r::Logger['docparser'].level = Log4r::INFO
9
+ end
10
+
11
+ it 'should not create a file' do
12
+ Dir.mktmpdir do |dir|
13
+ filename = File.join(dir, '*')
14
+ DocParser::NilOutput.new
15
+ Dir[filename].must_be_empty
16
+ end
17
+ end
18
+
19
+ it 'must give the correct rowcount' do
20
+ output = DocParser::NilOutput.new
21
+ output.header = 'test', 'the', 'header'
22
+ output.rowcount.must_equal 0
23
+ output.add_row ['aap', 'noot', 'mies']
24
+ output.add_row ['aap', 'noot', 'mies']
25
+ output.rowcount.must_equal 0
26
+ end
27
+ end
@@ -0,0 +1,55 @@
1
+ require_relative '../../../test_helper'
2
+ require 'stringio'
3
+ describe DocParser::ScreenOutput do
4
+ before do
5
+ Log4r::Logger['docparser'].level = Log4r::ERROR
6
+ end
7
+ after do
8
+ Log4r::Logger['docparser'].level = Log4r::INFO
9
+ end
10
+
11
+ it 'should not create a file' do
12
+ Dir.mktmpdir do |dir|
13
+ filename = File.join(dir, '*')
14
+ DocParser::ScreenOutput.new
15
+ Dir[filename].must_be_empty
16
+ end
17
+ end
18
+
19
+ it 'must give the correct rowcount' do
20
+ output = DocParser::ScreenOutput.new
21
+ output.header = 'test', 'the', 'header'
22
+ output.rowcount.must_equal 0
23
+ output.add_row ['aap', 'noot', 'mies']
24
+ output.add_row ['aap', 'noot', 'mies']
25
+ output.rowcount.must_equal 2
26
+ end
27
+
28
+ it 'must have a header' do
29
+ output = DocParser::ScreenOutput.new
30
+ -> do
31
+ output.add_row ['aap', 'noot', 'mies']
32
+ end.must_raise(DocParser::MissingHeaderException)
33
+ end
34
+
35
+
36
+ it 'must output the data after close' do
37
+ $out = StringIO.new
38
+ output = Class.new DocParser::ScreenOutput do
39
+ def page(*args, &p)
40
+ args << p
41
+ args.compact!
42
+ page_to $out, args
43
+ end
44
+ end.new
45
+ output.header = 'test', 'the', 'header'
46
+ output.add_row ['aap1', '', 'mies']
47
+ output.add_row ['aap2', 'mies1']
48
+ output.close
49
+ out = $out.string
50
+ out.must_include 'header'
51
+ out.must_include 'aap1'
52
+ out.must_include 'mies1'
53
+ out.must_include 'mies'
54
+ end
55
+ end
@@ -0,0 +1,53 @@
1
+ require_relative '../../../test_helper'
2
+
3
+ describe DocParser::XLSXOutput do
4
+ before do
5
+ Log4r::Logger['docparser'].level = Log4r::ERROR
6
+ end
7
+ after do
8
+ Log4r::Logger['docparser'].level = Log4r::INFO
9
+ end
10
+
11
+ it 'must create a file' do
12
+ Dir.mktmpdir do |dir|
13
+ filename = File.join(dir, 'test.xlsx')
14
+ DocParser::XLSXOutput.new(filename: filename)
15
+ File.exists?(filename).must_equal true
16
+ end
17
+ end
18
+
19
+ it 'must save the header' do
20
+ Dir.mktmpdir do |dir|
21
+ filename = File.join(dir, 'test.xlsx')
22
+ output = DocParser::XLSXOutput.new(filename: filename)
23
+ output.header = 'test', 'the', 'header'
24
+ output.close
25
+ sheet = output.instance_variable_get(:@sheet)
26
+ sheet.rows.length.must_equal(1)
27
+ end
28
+ end
29
+
30
+ it 'must save some rows' do
31
+ Dir.mktmpdir do |dir|
32
+ filename = File.join(dir, 'test.xlsx')
33
+ output = DocParser::XLSXOutput.new(filename: filename)
34
+ output.add_row ['aap', 'noot', 'mies']
35
+ output.add_row ['aap', 'noot', 'mies;']
36
+ output.close
37
+ sheet = output.instance_variable_get(:@sheet)
38
+ sheet.rows.length.must_equal(2)
39
+ end
40
+ end
41
+
42
+ it 'must give the correct rowcount' do
43
+ Dir.mktmpdir do |dir|
44
+ filename = File.join(dir, 'test.xlsx')
45
+ output = DocParser::XLSXOutput.new(filename: filename)
46
+ output.header = 'test', 'the', 'header'
47
+ output.rowcount.must_equal 0
48
+ output.add_row ['aap', 'noot', 'mies']
49
+ output.add_row ['aap', 'noot', 'mies']
50
+ output.rowcount.must_equal 2
51
+ end
52
+ end
53
+ end
@@ -0,0 +1,76 @@
1
+ require_relative '../../../test_helper'
2
+
3
+ describe DocParser::YAMLOutput do
4
+ before do
5
+ Log4r::Logger['docparser'].level = Log4r::ERROR
6
+ end
7
+ after do
8
+ Log4r::Logger['docparser'].level = Log4r::INFO
9
+ end
10
+
11
+ it 'must create a file' do
12
+ Dir.mktmpdir do |dir|
13
+ filename = File.join(dir, 'test.yml')
14
+ DocParser::YAMLOutput.new(filename: filename)
15
+ File.exists?(filename).must_equal true
16
+ end
17
+ end
18
+
19
+ it 'must save the header' do
20
+ Dir.mktmpdir do |dir|
21
+ filename = File.join(dir, 'test.yml')
22
+ output = DocParser::YAMLOutput.new(filename: filename)
23
+ output.header = 'test', 'the', 'header'
24
+ output.close
25
+ open(filename).read.must_equal ''
26
+ end
27
+ end
28
+
29
+ it 'must have a header' do
30
+ Dir.mktmpdir do |dir|
31
+ filename = File.join(dir, 'test.yml')
32
+ output = DocParser::YAMLOutput.new(filename: filename)
33
+ -> do
34
+ output.add_row ['aap', 'noot', 'mies']
35
+ end.must_raise(DocParser::MissingHeaderException)
36
+ end
37
+ end
38
+
39
+ it 'must save some rows' do
40
+ Dir.mktmpdir do |dir|
41
+ filename = File.join(dir, 'test.csv')
42
+ output = DocParser::YAMLOutput.new(filename: filename)
43
+ output.header = 'test', 'the', 'header'
44
+ output.add_row ['a', 'b', 'c']
45
+ output.add_row ['aap', 'noot', 'mies"']
46
+ output.add_row ['aap', 'noot'] # testing empty column
47
+ output.close
48
+ open(filename).read.must_equal <<-YAMLEND
49
+ ---
50
+ test: a
51
+ the: b
52
+ header: c
53
+ ---
54
+ test: aap
55
+ the: noot
56
+ header: mies\"
57
+ ---
58
+ test: aap
59
+ the: noot
60
+ header: ''
61
+ YAMLEND
62
+ end
63
+ end
64
+
65
+ it 'must give the correct rowcount' do
66
+ Dir.mktmpdir do |dir|
67
+ filename = File.join(dir, 'test.yml')
68
+ output = DocParser::YAMLOutput.new(filename: filename)
69
+ output.header = 'test', 'the', 'header'
70
+ output.rowcount.must_equal 0
71
+ output.add_row ['aap', 'noot', 'mies']
72
+ output.add_row ['aap', 'noot', 'mies']
73
+ output.rowcount.must_equal 2
74
+ end
75
+ end
76
+ end