docparser 0.0.1 → 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.gitignore +2 -0
- data/.rubocop.yml +5 -0
- data/.travis.yml +3 -0
- data/Gemfile +9 -1
- data/README.md +11 -4
- data/Rakefile +15 -0
- data/example.rb +9 -7
- data/lib/docparser.rb +1 -0
- data/lib/docparser/document.rb +18 -11
- data/lib/docparser/output.rb +8 -8
- data/lib/docparser/output/html_output.rb +53 -47
- data/lib/docparser/output/json_output.rb +8 -3
- data/lib/docparser/output/multi_output.rb +4 -8
- data/lib/docparser/output/nil_output.rb +21 -0
- data/lib/docparser/output/screen_output.rb +2 -1
- data/lib/docparser/output/xlsx_output.rb +12 -2
- data/lib/docparser/output/yaml_output.rb +6 -1
- data/lib/docparser/parser.rb +80 -49
- data/lib/docparser/version.rb +1 -1
- data/test/lib/docparser/blackbox_test.rb +29 -0
- data/test/lib/docparser/document_test.rb +134 -0
- data/test/lib/docparser/logging_test.rb +19 -0
- data/test/lib/docparser/output/csv_output_test.rb +51 -0
- data/test/lib/docparser/output/html_output_test.rb +57 -0
- data/test/lib/docparser/output/json_output_test.rb +65 -0
- data/test/lib/docparser/output/multi_output_test.rb +80 -0
- data/test/lib/docparser/output/nil_output_test.rb +27 -0
- data/test/lib/docparser/output/screen_output_test.rb +55 -0
- data/test/lib/docparser/output/xlsx_output_test.rb +53 -0
- data/test/lib/docparser/output/yaml_output_test.rb +76 -0
- data/test/lib/docparser/output_test.rb +85 -0
- data/test/lib/docparser/parser_test.rb +197 -0
- data/test/lib/docparser/version_test.rb +11 -0
- data/test/support/hackaday/dl.rb +4 -0
- data/test/support/hackaday/file_1.html +716 -0
- data/test/support/hackaday/file_10.html +791 -0
- data/test/support/hackaday/file_11.html +787 -0
- data/test/support/hackaday/file_12.html +715 -0
- data/test/support/hackaday/file_13.html +793 -0
- data/test/support/hackaday/file_14.html +718 -0
- data/test/support/hackaday/file_15.html +707 -0
- data/test/support/hackaday/file_16.html +713 -0
- data/test/support/hackaday/file_17.html +715 -0
- data/test/support/hackaday/file_18.html +725 -0
- data/test/support/hackaday/file_19.html +715 -0
- data/test/support/hackaday/file_2.html +793 -0
- data/test/support/hackaday/file_20.html +795 -0
- data/test/support/hackaday/file_21.html +804 -0
- data/test/support/hackaday/file_22.html +722 -0
- data/test/support/hackaday/file_23.html +793 -0
- data/test/support/hackaday/file_24.html +717 -0
- data/test/support/hackaday/file_25.html +715 -0
- data/test/support/hackaday/file_26.html +717 -0
- data/test/support/hackaday/file_27.html +723 -0
- data/test/support/hackaday/file_28.html +711 -0
- data/test/support/hackaday/file_29.html +711 -0
- data/test/support/hackaday/file_3.html +794 -0
- data/test/support/hackaday/file_30.html +715 -0
- data/test/support/hackaday/file_31.html +713 -0
- data/test/support/hackaday/file_32.html +714 -0
- data/test/support/hackaday/file_33.html +716 -0
- data/test/support/hackaday/file_34.html +714 -0
- data/test/support/hackaday/file_35.html +792 -0
- data/test/support/hackaday/file_36.html +719 -0
- data/test/support/hackaday/file_37.html +712 -0
- data/test/support/hackaday/file_38.html +709 -0
- data/test/support/hackaday/file_39.html +808 -0
- data/test/support/hackaday/file_4.html +814 -0
- data/test/support/hackaday/file_40.html +801 -0
- data/test/support/hackaday/file_5.html +715 -0
- data/test/support/hackaday/file_6.html +792 -0
- data/test/support/hackaday/file_7.html +714 -0
- data/test/support/hackaday/file_8.html +717 -0
- data/test/support/hackaday/file_9.html +719 -0
- data/test/support/test_encoding.html +12 -0
- data/test/support/test_encoding2.html +12 -0
- data/test/support/test_html.html +16 -0
- data/test/support/test_xml.xml +5 -0
- data/test/test_helper.rb +14 -0
- metadata +126 -3
@@ -0,0 +1,16 @@
|
|
1
|
+
<!DOCTYPE html>
|
2
|
+
<html>
|
3
|
+
<head>
|
4
|
+
<title>Test HTML</title>
|
5
|
+
</head>
|
6
|
+
<body>
|
7
|
+
<article>
|
8
|
+
<h1>This is an article</h1>
|
9
|
+
<p>Great article it is</p>
|
10
|
+
<p>This is the last paragraph</p>
|
11
|
+
<ul>
|
12
|
+
<li>Test HTML</li>
|
13
|
+
</ul>
|
14
|
+
</article>
|
15
|
+
</body>
|
16
|
+
</html>
|
data/test/test_helper.rb
ADDED
@@ -0,0 +1,14 @@
|
|
1
|
+
require 'simplecov'
|
2
|
+
SimpleCov.start do
|
3
|
+
#add_filter '/test/'
|
4
|
+
end
|
5
|
+
require 'minitest/autorun'
|
6
|
+
require 'minitest/pride'
|
7
|
+
require 'tempfile'
|
8
|
+
require 'tmpdir'
|
9
|
+
require 'simple_mock'
|
10
|
+
|
11
|
+
require File.expand_path('../lib/docparser.rb', __dir__)
|
12
|
+
$TEST_DIR = __dir__
|
13
|
+
$ROOT_DIR = File.expand_path('..', $TEST_DIR)
|
14
|
+
$SUPPORT_DIR = File.join(__dir__, 'support/')
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: docparser
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0
|
4
|
+
version: 0.1.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Jurriaan Pruis
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2013-04-
|
11
|
+
date: 2013-04-26 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -46,6 +46,8 @@ extensions: []
|
|
46
46
|
extra_rdoc_files: []
|
47
47
|
files:
|
48
48
|
- .gitignore
|
49
|
+
- .rubocop.yml
|
50
|
+
- .travis.yml
|
49
51
|
- .yardops
|
50
52
|
- Gemfile
|
51
53
|
- LICENSE
|
@@ -60,11 +62,72 @@ files:
|
|
60
62
|
- lib/docparser/output/html_output.rb
|
61
63
|
- lib/docparser/output/json_output.rb
|
62
64
|
- lib/docparser/output/multi_output.rb
|
65
|
+
- lib/docparser/output/nil_output.rb
|
63
66
|
- lib/docparser/output/screen_output.rb
|
64
67
|
- lib/docparser/output/xlsx_output.rb
|
65
68
|
- lib/docparser/output/yaml_output.rb
|
66
69
|
- lib/docparser/parser.rb
|
67
70
|
- lib/docparser/version.rb
|
71
|
+
- test/lib/docparser/blackbox_test.rb
|
72
|
+
- test/lib/docparser/document_test.rb
|
73
|
+
- test/lib/docparser/logging_test.rb
|
74
|
+
- test/lib/docparser/output/csv_output_test.rb
|
75
|
+
- test/lib/docparser/output/html_output_test.rb
|
76
|
+
- test/lib/docparser/output/json_output_test.rb
|
77
|
+
- test/lib/docparser/output/multi_output_test.rb
|
78
|
+
- test/lib/docparser/output/nil_output_test.rb
|
79
|
+
- test/lib/docparser/output/screen_output_test.rb
|
80
|
+
- test/lib/docparser/output/xlsx_output_test.rb
|
81
|
+
- test/lib/docparser/output/yaml_output_test.rb
|
82
|
+
- test/lib/docparser/output_test.rb
|
83
|
+
- test/lib/docparser/parser_test.rb
|
84
|
+
- test/lib/docparser/version_test.rb
|
85
|
+
- test/support/hackaday/dl.rb
|
86
|
+
- test/support/hackaday/file_1.html
|
87
|
+
- test/support/hackaday/file_10.html
|
88
|
+
- test/support/hackaday/file_11.html
|
89
|
+
- test/support/hackaday/file_12.html
|
90
|
+
- test/support/hackaday/file_13.html
|
91
|
+
- test/support/hackaday/file_14.html
|
92
|
+
- test/support/hackaday/file_15.html
|
93
|
+
- test/support/hackaday/file_16.html
|
94
|
+
- test/support/hackaday/file_17.html
|
95
|
+
- test/support/hackaday/file_18.html
|
96
|
+
- test/support/hackaday/file_19.html
|
97
|
+
- test/support/hackaday/file_2.html
|
98
|
+
- test/support/hackaday/file_20.html
|
99
|
+
- test/support/hackaday/file_21.html
|
100
|
+
- test/support/hackaday/file_22.html
|
101
|
+
- test/support/hackaday/file_23.html
|
102
|
+
- test/support/hackaday/file_24.html
|
103
|
+
- test/support/hackaday/file_25.html
|
104
|
+
- test/support/hackaday/file_26.html
|
105
|
+
- test/support/hackaday/file_27.html
|
106
|
+
- test/support/hackaday/file_28.html
|
107
|
+
- test/support/hackaday/file_29.html
|
108
|
+
- test/support/hackaday/file_3.html
|
109
|
+
- test/support/hackaday/file_30.html
|
110
|
+
- test/support/hackaday/file_31.html
|
111
|
+
- test/support/hackaday/file_32.html
|
112
|
+
- test/support/hackaday/file_33.html
|
113
|
+
- test/support/hackaday/file_34.html
|
114
|
+
- test/support/hackaday/file_35.html
|
115
|
+
- test/support/hackaday/file_36.html
|
116
|
+
- test/support/hackaday/file_37.html
|
117
|
+
- test/support/hackaday/file_38.html
|
118
|
+
- test/support/hackaday/file_39.html
|
119
|
+
- test/support/hackaday/file_4.html
|
120
|
+
- test/support/hackaday/file_40.html
|
121
|
+
- test/support/hackaday/file_5.html
|
122
|
+
- test/support/hackaday/file_6.html
|
123
|
+
- test/support/hackaday/file_7.html
|
124
|
+
- test/support/hackaday/file_8.html
|
125
|
+
- test/support/hackaday/file_9.html
|
126
|
+
- test/support/test_encoding.html
|
127
|
+
- test/support/test_encoding2.html
|
128
|
+
- test/support/test_html.html
|
129
|
+
- test/support/test_xml.xml
|
130
|
+
- test/test_helper.rb
|
68
131
|
homepage: https://github.com/jurriaan/docparser
|
69
132
|
licenses:
|
70
133
|
- MIT
|
@@ -89,5 +152,65 @@ rubygems_version: 2.0.3
|
|
89
152
|
signing_key:
|
90
153
|
specification_version: 4
|
91
154
|
summary: DocParser is a Ruby Gem for webscraping
|
92
|
-
test_files:
|
155
|
+
test_files:
|
156
|
+
- test/lib/docparser/blackbox_test.rb
|
157
|
+
- test/lib/docparser/document_test.rb
|
158
|
+
- test/lib/docparser/logging_test.rb
|
159
|
+
- test/lib/docparser/output/csv_output_test.rb
|
160
|
+
- test/lib/docparser/output/html_output_test.rb
|
161
|
+
- test/lib/docparser/output/json_output_test.rb
|
162
|
+
- test/lib/docparser/output/multi_output_test.rb
|
163
|
+
- test/lib/docparser/output/nil_output_test.rb
|
164
|
+
- test/lib/docparser/output/screen_output_test.rb
|
165
|
+
- test/lib/docparser/output/xlsx_output_test.rb
|
166
|
+
- test/lib/docparser/output/yaml_output_test.rb
|
167
|
+
- test/lib/docparser/output_test.rb
|
168
|
+
- test/lib/docparser/parser_test.rb
|
169
|
+
- test/lib/docparser/version_test.rb
|
170
|
+
- test/support/hackaday/dl.rb
|
171
|
+
- test/support/hackaday/file_1.html
|
172
|
+
- test/support/hackaday/file_10.html
|
173
|
+
- test/support/hackaday/file_11.html
|
174
|
+
- test/support/hackaday/file_12.html
|
175
|
+
- test/support/hackaday/file_13.html
|
176
|
+
- test/support/hackaday/file_14.html
|
177
|
+
- test/support/hackaday/file_15.html
|
178
|
+
- test/support/hackaday/file_16.html
|
179
|
+
- test/support/hackaday/file_17.html
|
180
|
+
- test/support/hackaday/file_18.html
|
181
|
+
- test/support/hackaday/file_19.html
|
182
|
+
- test/support/hackaday/file_2.html
|
183
|
+
- test/support/hackaday/file_20.html
|
184
|
+
- test/support/hackaday/file_21.html
|
185
|
+
- test/support/hackaday/file_22.html
|
186
|
+
- test/support/hackaday/file_23.html
|
187
|
+
- test/support/hackaday/file_24.html
|
188
|
+
- test/support/hackaday/file_25.html
|
189
|
+
- test/support/hackaday/file_26.html
|
190
|
+
- test/support/hackaday/file_27.html
|
191
|
+
- test/support/hackaday/file_28.html
|
192
|
+
- test/support/hackaday/file_29.html
|
193
|
+
- test/support/hackaday/file_3.html
|
194
|
+
- test/support/hackaday/file_30.html
|
195
|
+
- test/support/hackaday/file_31.html
|
196
|
+
- test/support/hackaday/file_32.html
|
197
|
+
- test/support/hackaday/file_33.html
|
198
|
+
- test/support/hackaday/file_34.html
|
199
|
+
- test/support/hackaday/file_35.html
|
200
|
+
- test/support/hackaday/file_36.html
|
201
|
+
- test/support/hackaday/file_37.html
|
202
|
+
- test/support/hackaday/file_38.html
|
203
|
+
- test/support/hackaday/file_39.html
|
204
|
+
- test/support/hackaday/file_4.html
|
205
|
+
- test/support/hackaday/file_40.html
|
206
|
+
- test/support/hackaday/file_5.html
|
207
|
+
- test/support/hackaday/file_6.html
|
208
|
+
- test/support/hackaday/file_7.html
|
209
|
+
- test/support/hackaday/file_8.html
|
210
|
+
- test/support/hackaday/file_9.html
|
211
|
+
- test/support/test_encoding.html
|
212
|
+
- test/support/test_encoding2.html
|
213
|
+
- test/support/test_html.html
|
214
|
+
- test/support/test_xml.xml
|
215
|
+
- test/test_helper.rb
|
93
216
|
has_rdoc:
|