docparser 0.0.1 → 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (81) hide show
  1. checksums.yaml +4 -4
  2. data/.gitignore +2 -0
  3. data/.rubocop.yml +5 -0
  4. data/.travis.yml +3 -0
  5. data/Gemfile +9 -1
  6. data/README.md +11 -4
  7. data/Rakefile +15 -0
  8. data/example.rb +9 -7
  9. data/lib/docparser.rb +1 -0
  10. data/lib/docparser/document.rb +18 -11
  11. data/lib/docparser/output.rb +8 -8
  12. data/lib/docparser/output/html_output.rb +53 -47
  13. data/lib/docparser/output/json_output.rb +8 -3
  14. data/lib/docparser/output/multi_output.rb +4 -8
  15. data/lib/docparser/output/nil_output.rb +21 -0
  16. data/lib/docparser/output/screen_output.rb +2 -1
  17. data/lib/docparser/output/xlsx_output.rb +12 -2
  18. data/lib/docparser/output/yaml_output.rb +6 -1
  19. data/lib/docparser/parser.rb +80 -49
  20. data/lib/docparser/version.rb +1 -1
  21. data/test/lib/docparser/blackbox_test.rb +29 -0
  22. data/test/lib/docparser/document_test.rb +134 -0
  23. data/test/lib/docparser/logging_test.rb +19 -0
  24. data/test/lib/docparser/output/csv_output_test.rb +51 -0
  25. data/test/lib/docparser/output/html_output_test.rb +57 -0
  26. data/test/lib/docparser/output/json_output_test.rb +65 -0
  27. data/test/lib/docparser/output/multi_output_test.rb +80 -0
  28. data/test/lib/docparser/output/nil_output_test.rb +27 -0
  29. data/test/lib/docparser/output/screen_output_test.rb +55 -0
  30. data/test/lib/docparser/output/xlsx_output_test.rb +53 -0
  31. data/test/lib/docparser/output/yaml_output_test.rb +76 -0
  32. data/test/lib/docparser/output_test.rb +85 -0
  33. data/test/lib/docparser/parser_test.rb +197 -0
  34. data/test/lib/docparser/version_test.rb +11 -0
  35. data/test/support/hackaday/dl.rb +4 -0
  36. data/test/support/hackaday/file_1.html +716 -0
  37. data/test/support/hackaday/file_10.html +791 -0
  38. data/test/support/hackaday/file_11.html +787 -0
  39. data/test/support/hackaday/file_12.html +715 -0
  40. data/test/support/hackaday/file_13.html +793 -0
  41. data/test/support/hackaday/file_14.html +718 -0
  42. data/test/support/hackaday/file_15.html +707 -0
  43. data/test/support/hackaday/file_16.html +713 -0
  44. data/test/support/hackaday/file_17.html +715 -0
  45. data/test/support/hackaday/file_18.html +725 -0
  46. data/test/support/hackaday/file_19.html +715 -0
  47. data/test/support/hackaday/file_2.html +793 -0
  48. data/test/support/hackaday/file_20.html +795 -0
  49. data/test/support/hackaday/file_21.html +804 -0
  50. data/test/support/hackaday/file_22.html +722 -0
  51. data/test/support/hackaday/file_23.html +793 -0
  52. data/test/support/hackaday/file_24.html +717 -0
  53. data/test/support/hackaday/file_25.html +715 -0
  54. data/test/support/hackaday/file_26.html +717 -0
  55. data/test/support/hackaday/file_27.html +723 -0
  56. data/test/support/hackaday/file_28.html +711 -0
  57. data/test/support/hackaday/file_29.html +711 -0
  58. data/test/support/hackaday/file_3.html +794 -0
  59. data/test/support/hackaday/file_30.html +715 -0
  60. data/test/support/hackaday/file_31.html +713 -0
  61. data/test/support/hackaday/file_32.html +714 -0
  62. data/test/support/hackaday/file_33.html +716 -0
  63. data/test/support/hackaday/file_34.html +714 -0
  64. data/test/support/hackaday/file_35.html +792 -0
  65. data/test/support/hackaday/file_36.html +719 -0
  66. data/test/support/hackaday/file_37.html +712 -0
  67. data/test/support/hackaday/file_38.html +709 -0
  68. data/test/support/hackaday/file_39.html +808 -0
  69. data/test/support/hackaday/file_4.html +814 -0
  70. data/test/support/hackaday/file_40.html +801 -0
  71. data/test/support/hackaday/file_5.html +715 -0
  72. data/test/support/hackaday/file_6.html +792 -0
  73. data/test/support/hackaday/file_7.html +714 -0
  74. data/test/support/hackaday/file_8.html +717 -0
  75. data/test/support/hackaday/file_9.html +719 -0
  76. data/test/support/test_encoding.html +12 -0
  77. data/test/support/test_encoding2.html +12 -0
  78. data/test/support/test_html.html +16 -0
  79. data/test/support/test_xml.xml +5 -0
  80. data/test/test_helper.rb +14 -0
  81. metadata +126 -3
@@ -0,0 +1,12 @@
1
+ <!DOCTYPE html>
2
+ <html>
3
+ <head>
4
+ <title>Test HTML</title>
5
+ </head>
6
+ <body>
7
+ <article>
8
+ <h1>This is an article</h1>
9
+ <span id="encoding">éÆúøµñüî`÷</span>
10
+ </article>
11
+ </body>
12
+ </html>
@@ -0,0 +1,12 @@
1
+ <!DOCTYPE html>
2
+ <html>
3
+ <head>
4
+ <title>Test HTML</title>
5
+ </head>
6
+ <body>
7
+ <article>
8
+ <h1>This is an article</h1>
9
+ <span id="encoding">��������`�</span>
10
+ </article>
11
+ </body>
12
+ </html>
@@ -0,0 +1,16 @@
1
+ <!DOCTYPE html>
2
+ <html>
3
+ <head>
4
+ <title>Test HTML</title>
5
+ </head>
6
+ <body>
7
+ <article>
8
+ <h1>This is an article</h1>
9
+ <p>Great article it is</p>
10
+ <p>This is the last paragraph</p>
11
+ <ul>
12
+ <li>Test HTML</li>
13
+ </ul>
14
+ </article>
15
+ </body>
16
+ </html>
@@ -0,0 +1,5 @@
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <xmltest>
3
+ <title>Test XML</title>
4
+ <test><![CDATA[Character Data]]></test>
5
+ </xmltest>
@@ -0,0 +1,14 @@
1
+ require 'simplecov'
2
+ SimpleCov.start do
3
+ #add_filter '/test/'
4
+ end
5
+ require 'minitest/autorun'
6
+ require 'minitest/pride'
7
+ require 'tempfile'
8
+ require 'tmpdir'
9
+ require 'simple_mock'
10
+
11
+ require File.expand_path('../lib/docparser.rb', __dir__)
12
+ $TEST_DIR = __dir__
13
+ $ROOT_DIR = File.expand_path('..', $TEST_DIR)
14
+ $SUPPORT_DIR = File.join(__dir__, 'support/')
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: docparser
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.1
4
+ version: 0.1.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Jurriaan Pruis
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2013-04-11 00:00:00.000000000 Z
11
+ date: 2013-04-26 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -46,6 +46,8 @@ extensions: []
46
46
  extra_rdoc_files: []
47
47
  files:
48
48
  - .gitignore
49
+ - .rubocop.yml
50
+ - .travis.yml
49
51
  - .yardops
50
52
  - Gemfile
51
53
  - LICENSE
@@ -60,11 +62,72 @@ files:
60
62
  - lib/docparser/output/html_output.rb
61
63
  - lib/docparser/output/json_output.rb
62
64
  - lib/docparser/output/multi_output.rb
65
+ - lib/docparser/output/nil_output.rb
63
66
  - lib/docparser/output/screen_output.rb
64
67
  - lib/docparser/output/xlsx_output.rb
65
68
  - lib/docparser/output/yaml_output.rb
66
69
  - lib/docparser/parser.rb
67
70
  - lib/docparser/version.rb
71
+ - test/lib/docparser/blackbox_test.rb
72
+ - test/lib/docparser/document_test.rb
73
+ - test/lib/docparser/logging_test.rb
74
+ - test/lib/docparser/output/csv_output_test.rb
75
+ - test/lib/docparser/output/html_output_test.rb
76
+ - test/lib/docparser/output/json_output_test.rb
77
+ - test/lib/docparser/output/multi_output_test.rb
78
+ - test/lib/docparser/output/nil_output_test.rb
79
+ - test/lib/docparser/output/screen_output_test.rb
80
+ - test/lib/docparser/output/xlsx_output_test.rb
81
+ - test/lib/docparser/output/yaml_output_test.rb
82
+ - test/lib/docparser/output_test.rb
83
+ - test/lib/docparser/parser_test.rb
84
+ - test/lib/docparser/version_test.rb
85
+ - test/support/hackaday/dl.rb
86
+ - test/support/hackaday/file_1.html
87
+ - test/support/hackaday/file_10.html
88
+ - test/support/hackaday/file_11.html
89
+ - test/support/hackaday/file_12.html
90
+ - test/support/hackaday/file_13.html
91
+ - test/support/hackaday/file_14.html
92
+ - test/support/hackaday/file_15.html
93
+ - test/support/hackaday/file_16.html
94
+ - test/support/hackaday/file_17.html
95
+ - test/support/hackaday/file_18.html
96
+ - test/support/hackaday/file_19.html
97
+ - test/support/hackaday/file_2.html
98
+ - test/support/hackaday/file_20.html
99
+ - test/support/hackaday/file_21.html
100
+ - test/support/hackaday/file_22.html
101
+ - test/support/hackaday/file_23.html
102
+ - test/support/hackaday/file_24.html
103
+ - test/support/hackaday/file_25.html
104
+ - test/support/hackaday/file_26.html
105
+ - test/support/hackaday/file_27.html
106
+ - test/support/hackaday/file_28.html
107
+ - test/support/hackaday/file_29.html
108
+ - test/support/hackaday/file_3.html
109
+ - test/support/hackaday/file_30.html
110
+ - test/support/hackaday/file_31.html
111
+ - test/support/hackaday/file_32.html
112
+ - test/support/hackaday/file_33.html
113
+ - test/support/hackaday/file_34.html
114
+ - test/support/hackaday/file_35.html
115
+ - test/support/hackaday/file_36.html
116
+ - test/support/hackaday/file_37.html
117
+ - test/support/hackaday/file_38.html
118
+ - test/support/hackaday/file_39.html
119
+ - test/support/hackaday/file_4.html
120
+ - test/support/hackaday/file_40.html
121
+ - test/support/hackaday/file_5.html
122
+ - test/support/hackaday/file_6.html
123
+ - test/support/hackaday/file_7.html
124
+ - test/support/hackaday/file_8.html
125
+ - test/support/hackaday/file_9.html
126
+ - test/support/test_encoding.html
127
+ - test/support/test_encoding2.html
128
+ - test/support/test_html.html
129
+ - test/support/test_xml.xml
130
+ - test/test_helper.rb
68
131
  homepage: https://github.com/jurriaan/docparser
69
132
  licenses:
70
133
  - MIT
@@ -89,5 +152,65 @@ rubygems_version: 2.0.3
89
152
  signing_key:
90
153
  specification_version: 4
91
154
  summary: DocParser is a Ruby Gem for webscraping
92
- test_files: []
155
+ test_files:
156
+ - test/lib/docparser/blackbox_test.rb
157
+ - test/lib/docparser/document_test.rb
158
+ - test/lib/docparser/logging_test.rb
159
+ - test/lib/docparser/output/csv_output_test.rb
160
+ - test/lib/docparser/output/html_output_test.rb
161
+ - test/lib/docparser/output/json_output_test.rb
162
+ - test/lib/docparser/output/multi_output_test.rb
163
+ - test/lib/docparser/output/nil_output_test.rb
164
+ - test/lib/docparser/output/screen_output_test.rb
165
+ - test/lib/docparser/output/xlsx_output_test.rb
166
+ - test/lib/docparser/output/yaml_output_test.rb
167
+ - test/lib/docparser/output_test.rb
168
+ - test/lib/docparser/parser_test.rb
169
+ - test/lib/docparser/version_test.rb
170
+ - test/support/hackaday/dl.rb
171
+ - test/support/hackaday/file_1.html
172
+ - test/support/hackaday/file_10.html
173
+ - test/support/hackaday/file_11.html
174
+ - test/support/hackaday/file_12.html
175
+ - test/support/hackaday/file_13.html
176
+ - test/support/hackaday/file_14.html
177
+ - test/support/hackaday/file_15.html
178
+ - test/support/hackaday/file_16.html
179
+ - test/support/hackaday/file_17.html
180
+ - test/support/hackaday/file_18.html
181
+ - test/support/hackaday/file_19.html
182
+ - test/support/hackaday/file_2.html
183
+ - test/support/hackaday/file_20.html
184
+ - test/support/hackaday/file_21.html
185
+ - test/support/hackaday/file_22.html
186
+ - test/support/hackaday/file_23.html
187
+ - test/support/hackaday/file_24.html
188
+ - test/support/hackaday/file_25.html
189
+ - test/support/hackaday/file_26.html
190
+ - test/support/hackaday/file_27.html
191
+ - test/support/hackaday/file_28.html
192
+ - test/support/hackaday/file_29.html
193
+ - test/support/hackaday/file_3.html
194
+ - test/support/hackaday/file_30.html
195
+ - test/support/hackaday/file_31.html
196
+ - test/support/hackaday/file_32.html
197
+ - test/support/hackaday/file_33.html
198
+ - test/support/hackaday/file_34.html
199
+ - test/support/hackaday/file_35.html
200
+ - test/support/hackaday/file_36.html
201
+ - test/support/hackaday/file_37.html
202
+ - test/support/hackaday/file_38.html
203
+ - test/support/hackaday/file_39.html
204
+ - test/support/hackaday/file_4.html
205
+ - test/support/hackaday/file_40.html
206
+ - test/support/hackaday/file_5.html
207
+ - test/support/hackaday/file_6.html
208
+ - test/support/hackaday/file_7.html
209
+ - test/support/hackaday/file_8.html
210
+ - test/support/hackaday/file_9.html
211
+ - test/support/test_encoding.html
212
+ - test/support/test_encoding2.html
213
+ - test/support/test_html.html
214
+ - test/support/test_xml.xml
215
+ - test/test_helper.rb
93
216
  has_rdoc: