docparser 0.0.1 → 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (81) hide show
  1. checksums.yaml +4 -4
  2. data/.gitignore +2 -0
  3. data/.rubocop.yml +5 -0
  4. data/.travis.yml +3 -0
  5. data/Gemfile +9 -1
  6. data/README.md +11 -4
  7. data/Rakefile +15 -0
  8. data/example.rb +9 -7
  9. data/lib/docparser.rb +1 -0
  10. data/lib/docparser/document.rb +18 -11
  11. data/lib/docparser/output.rb +8 -8
  12. data/lib/docparser/output/html_output.rb +53 -47
  13. data/lib/docparser/output/json_output.rb +8 -3
  14. data/lib/docparser/output/multi_output.rb +4 -8
  15. data/lib/docparser/output/nil_output.rb +21 -0
  16. data/lib/docparser/output/screen_output.rb +2 -1
  17. data/lib/docparser/output/xlsx_output.rb +12 -2
  18. data/lib/docparser/output/yaml_output.rb +6 -1
  19. data/lib/docparser/parser.rb +80 -49
  20. data/lib/docparser/version.rb +1 -1
  21. data/test/lib/docparser/blackbox_test.rb +29 -0
  22. data/test/lib/docparser/document_test.rb +134 -0
  23. data/test/lib/docparser/logging_test.rb +19 -0
  24. data/test/lib/docparser/output/csv_output_test.rb +51 -0
  25. data/test/lib/docparser/output/html_output_test.rb +57 -0
  26. data/test/lib/docparser/output/json_output_test.rb +65 -0
  27. data/test/lib/docparser/output/multi_output_test.rb +80 -0
  28. data/test/lib/docparser/output/nil_output_test.rb +27 -0
  29. data/test/lib/docparser/output/screen_output_test.rb +55 -0
  30. data/test/lib/docparser/output/xlsx_output_test.rb +53 -0
  31. data/test/lib/docparser/output/yaml_output_test.rb +76 -0
  32. data/test/lib/docparser/output_test.rb +85 -0
  33. data/test/lib/docparser/parser_test.rb +197 -0
  34. data/test/lib/docparser/version_test.rb +11 -0
  35. data/test/support/hackaday/dl.rb +4 -0
  36. data/test/support/hackaday/file_1.html +716 -0
  37. data/test/support/hackaday/file_10.html +791 -0
  38. data/test/support/hackaday/file_11.html +787 -0
  39. data/test/support/hackaday/file_12.html +715 -0
  40. data/test/support/hackaday/file_13.html +793 -0
  41. data/test/support/hackaday/file_14.html +718 -0
  42. data/test/support/hackaday/file_15.html +707 -0
  43. data/test/support/hackaday/file_16.html +713 -0
  44. data/test/support/hackaday/file_17.html +715 -0
  45. data/test/support/hackaday/file_18.html +725 -0
  46. data/test/support/hackaday/file_19.html +715 -0
  47. data/test/support/hackaday/file_2.html +793 -0
  48. data/test/support/hackaday/file_20.html +795 -0
  49. data/test/support/hackaday/file_21.html +804 -0
  50. data/test/support/hackaday/file_22.html +722 -0
  51. data/test/support/hackaday/file_23.html +793 -0
  52. data/test/support/hackaday/file_24.html +717 -0
  53. data/test/support/hackaday/file_25.html +715 -0
  54. data/test/support/hackaday/file_26.html +717 -0
  55. data/test/support/hackaday/file_27.html +723 -0
  56. data/test/support/hackaday/file_28.html +711 -0
  57. data/test/support/hackaday/file_29.html +711 -0
  58. data/test/support/hackaday/file_3.html +794 -0
  59. data/test/support/hackaday/file_30.html +715 -0
  60. data/test/support/hackaday/file_31.html +713 -0
  61. data/test/support/hackaday/file_32.html +714 -0
  62. data/test/support/hackaday/file_33.html +716 -0
  63. data/test/support/hackaday/file_34.html +714 -0
  64. data/test/support/hackaday/file_35.html +792 -0
  65. data/test/support/hackaday/file_36.html +719 -0
  66. data/test/support/hackaday/file_37.html +712 -0
  67. data/test/support/hackaday/file_38.html +709 -0
  68. data/test/support/hackaday/file_39.html +808 -0
  69. data/test/support/hackaday/file_4.html +814 -0
  70. data/test/support/hackaday/file_40.html +801 -0
  71. data/test/support/hackaday/file_5.html +715 -0
  72. data/test/support/hackaday/file_6.html +792 -0
  73. data/test/support/hackaday/file_7.html +714 -0
  74. data/test/support/hackaday/file_8.html +717 -0
  75. data/test/support/hackaday/file_9.html +719 -0
  76. data/test/support/test_encoding.html +12 -0
  77. data/test/support/test_encoding2.html +12 -0
  78. data/test/support/test_html.html +16 -0
  79. data/test/support/test_xml.xml +5 -0
  80. data/test/test_helper.rb +14 -0
  81. metadata +126 -3
@@ -0,0 +1,12 @@
1
+ <!DOCTYPE html>
2
+ <html>
3
+ <head>
4
+ <title>Test HTML</title>
5
+ </head>
6
+ <body>
7
+ <article>
8
+ <h1>This is an article</h1>
9
+ <span id="encoding">éÆúøµñüî`÷</span>
10
+ </article>
11
+ </body>
12
+ </html>
@@ -0,0 +1,12 @@
1
+ <!DOCTYPE html>
2
+ <html>
3
+ <head>
4
+ <title>Test HTML</title>
5
+ </head>
6
+ <body>
7
+ <article>
8
+ <h1>This is an article</h1>
9
+ <span id="encoding">��������`�</span>
10
+ </article>
11
+ </body>
12
+ </html>
@@ -0,0 +1,16 @@
1
+ <!DOCTYPE html>
2
+ <html>
3
+ <head>
4
+ <title>Test HTML</title>
5
+ </head>
6
+ <body>
7
+ <article>
8
+ <h1>This is an article</h1>
9
+ <p>Great article it is</p>
10
+ <p>This is the last paragraph</p>
11
+ <ul>
12
+ <li>Test HTML</li>
13
+ </ul>
14
+ </article>
15
+ </body>
16
+ </html>
@@ -0,0 +1,5 @@
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <xmltest>
3
+ <title>Test XML</title>
4
+ <test><![CDATA[Character Data]]></test>
5
+ </xmltest>
@@ -0,0 +1,14 @@
1
+ require 'simplecov'
2
+ SimpleCov.start do
3
+ #add_filter '/test/'
4
+ end
5
+ require 'minitest/autorun'
6
+ require 'minitest/pride'
7
+ require 'tempfile'
8
+ require 'tmpdir'
9
+ require 'simple_mock'
10
+
11
+ require File.expand_path('../lib/docparser.rb', __dir__)
12
+ $TEST_DIR = __dir__
13
+ $ROOT_DIR = File.expand_path('..', $TEST_DIR)
14
+ $SUPPORT_DIR = File.join(__dir__, 'support/')
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: docparser
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.1
4
+ version: 0.1.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Jurriaan Pruis
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2013-04-11 00:00:00.000000000 Z
11
+ date: 2013-04-26 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -46,6 +46,8 @@ extensions: []
46
46
  extra_rdoc_files: []
47
47
  files:
48
48
  - .gitignore
49
+ - .rubocop.yml
50
+ - .travis.yml
49
51
  - .yardops
50
52
  - Gemfile
51
53
  - LICENSE
@@ -60,11 +62,72 @@ files:
60
62
  - lib/docparser/output/html_output.rb
61
63
  - lib/docparser/output/json_output.rb
62
64
  - lib/docparser/output/multi_output.rb
65
+ - lib/docparser/output/nil_output.rb
63
66
  - lib/docparser/output/screen_output.rb
64
67
  - lib/docparser/output/xlsx_output.rb
65
68
  - lib/docparser/output/yaml_output.rb
66
69
  - lib/docparser/parser.rb
67
70
  - lib/docparser/version.rb
71
+ - test/lib/docparser/blackbox_test.rb
72
+ - test/lib/docparser/document_test.rb
73
+ - test/lib/docparser/logging_test.rb
74
+ - test/lib/docparser/output/csv_output_test.rb
75
+ - test/lib/docparser/output/html_output_test.rb
76
+ - test/lib/docparser/output/json_output_test.rb
77
+ - test/lib/docparser/output/multi_output_test.rb
78
+ - test/lib/docparser/output/nil_output_test.rb
79
+ - test/lib/docparser/output/screen_output_test.rb
80
+ - test/lib/docparser/output/xlsx_output_test.rb
81
+ - test/lib/docparser/output/yaml_output_test.rb
82
+ - test/lib/docparser/output_test.rb
83
+ - test/lib/docparser/parser_test.rb
84
+ - test/lib/docparser/version_test.rb
85
+ - test/support/hackaday/dl.rb
86
+ - test/support/hackaday/file_1.html
87
+ - test/support/hackaday/file_10.html
88
+ - test/support/hackaday/file_11.html
89
+ - test/support/hackaday/file_12.html
90
+ - test/support/hackaday/file_13.html
91
+ - test/support/hackaday/file_14.html
92
+ - test/support/hackaday/file_15.html
93
+ - test/support/hackaday/file_16.html
94
+ - test/support/hackaday/file_17.html
95
+ - test/support/hackaday/file_18.html
96
+ - test/support/hackaday/file_19.html
97
+ - test/support/hackaday/file_2.html
98
+ - test/support/hackaday/file_20.html
99
+ - test/support/hackaday/file_21.html
100
+ - test/support/hackaday/file_22.html
101
+ - test/support/hackaday/file_23.html
102
+ - test/support/hackaday/file_24.html
103
+ - test/support/hackaday/file_25.html
104
+ - test/support/hackaday/file_26.html
105
+ - test/support/hackaday/file_27.html
106
+ - test/support/hackaday/file_28.html
107
+ - test/support/hackaday/file_29.html
108
+ - test/support/hackaday/file_3.html
109
+ - test/support/hackaday/file_30.html
110
+ - test/support/hackaday/file_31.html
111
+ - test/support/hackaday/file_32.html
112
+ - test/support/hackaday/file_33.html
113
+ - test/support/hackaday/file_34.html
114
+ - test/support/hackaday/file_35.html
115
+ - test/support/hackaday/file_36.html
116
+ - test/support/hackaday/file_37.html
117
+ - test/support/hackaday/file_38.html
118
+ - test/support/hackaday/file_39.html
119
+ - test/support/hackaday/file_4.html
120
+ - test/support/hackaday/file_40.html
121
+ - test/support/hackaday/file_5.html
122
+ - test/support/hackaday/file_6.html
123
+ - test/support/hackaday/file_7.html
124
+ - test/support/hackaday/file_8.html
125
+ - test/support/hackaday/file_9.html
126
+ - test/support/test_encoding.html
127
+ - test/support/test_encoding2.html
128
+ - test/support/test_html.html
129
+ - test/support/test_xml.xml
130
+ - test/test_helper.rb
68
131
  homepage: https://github.com/jurriaan/docparser
69
132
  licenses:
70
133
  - MIT
@@ -89,5 +152,65 @@ rubygems_version: 2.0.3
89
152
  signing_key:
90
153
  specification_version: 4
91
154
  summary: DocParser is a Ruby Gem for webscraping
92
- test_files: []
155
+ test_files:
156
+ - test/lib/docparser/blackbox_test.rb
157
+ - test/lib/docparser/document_test.rb
158
+ - test/lib/docparser/logging_test.rb
159
+ - test/lib/docparser/output/csv_output_test.rb
160
+ - test/lib/docparser/output/html_output_test.rb
161
+ - test/lib/docparser/output/json_output_test.rb
162
+ - test/lib/docparser/output/multi_output_test.rb
163
+ - test/lib/docparser/output/nil_output_test.rb
164
+ - test/lib/docparser/output/screen_output_test.rb
165
+ - test/lib/docparser/output/xlsx_output_test.rb
166
+ - test/lib/docparser/output/yaml_output_test.rb
167
+ - test/lib/docparser/output_test.rb
168
+ - test/lib/docparser/parser_test.rb
169
+ - test/lib/docparser/version_test.rb
170
+ - test/support/hackaday/dl.rb
171
+ - test/support/hackaday/file_1.html
172
+ - test/support/hackaday/file_10.html
173
+ - test/support/hackaday/file_11.html
174
+ - test/support/hackaday/file_12.html
175
+ - test/support/hackaday/file_13.html
176
+ - test/support/hackaday/file_14.html
177
+ - test/support/hackaday/file_15.html
178
+ - test/support/hackaday/file_16.html
179
+ - test/support/hackaday/file_17.html
180
+ - test/support/hackaday/file_18.html
181
+ - test/support/hackaday/file_19.html
182
+ - test/support/hackaday/file_2.html
183
+ - test/support/hackaday/file_20.html
184
+ - test/support/hackaday/file_21.html
185
+ - test/support/hackaday/file_22.html
186
+ - test/support/hackaday/file_23.html
187
+ - test/support/hackaday/file_24.html
188
+ - test/support/hackaday/file_25.html
189
+ - test/support/hackaday/file_26.html
190
+ - test/support/hackaday/file_27.html
191
+ - test/support/hackaday/file_28.html
192
+ - test/support/hackaday/file_29.html
193
+ - test/support/hackaday/file_3.html
194
+ - test/support/hackaday/file_30.html
195
+ - test/support/hackaday/file_31.html
196
+ - test/support/hackaday/file_32.html
197
+ - test/support/hackaday/file_33.html
198
+ - test/support/hackaday/file_34.html
199
+ - test/support/hackaday/file_35.html
200
+ - test/support/hackaday/file_36.html
201
+ - test/support/hackaday/file_37.html
202
+ - test/support/hackaday/file_38.html
203
+ - test/support/hackaday/file_39.html
204
+ - test/support/hackaday/file_4.html
205
+ - test/support/hackaday/file_40.html
206
+ - test/support/hackaday/file_5.html
207
+ - test/support/hackaday/file_6.html
208
+ - test/support/hackaday/file_7.html
209
+ - test/support/hackaday/file_8.html
210
+ - test/support/hackaday/file_9.html
211
+ - test/support/test_encoding.html
212
+ - test/support/test_encoding2.html
213
+ - test/support/test_html.html
214
+ - test/support/test_xml.xml
215
+ - test/test_helper.rb
93
216
  has_rdoc: