yasuri 3.0.0 → 3.3.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.github/workflows/ruby.yml +1 -1
- data/.rubocop.yml +49 -0
- data/.rubocop_todo.yml +0 -0
- data/README.md +70 -27
- data/Rakefile +1 -1
- data/USAGE.ja.md +366 -131
- data/USAGE.md +371 -136
- data/examples/example.rb +78 -0
- data/examples/github.yml +15 -0
- data/examples/sample.json +4 -0
- data/examples/sample.yml +11 -0
- data/exe/yasuri +5 -0
- data/lib/yasuri.rb +1 -0
- data/lib/yasuri/version.rb +1 -1
- data/lib/yasuri/yasuri.rb +96 -76
- data/lib/yasuri/yasuri_cli.rb +78 -0
- data/lib/yasuri/yasuri_links_node.rb +10 -6
- data/lib/yasuri/yasuri_map_node.rb +40 -0
- data/lib/yasuri/yasuri_node.rb +36 -4
- data/lib/yasuri/yasuri_node_generator.rb +14 -9
- data/lib/yasuri/yasuri_paginate_node.rb +26 -16
- data/lib/yasuri/yasuri_struct_node.rb +6 -4
- data/lib/yasuri/yasuri_text_node.rb +9 -7
- data/spec/cli_resources/tree.json +8 -0
- data/spec/cli_resources/tree.yml +5 -0
- data/spec/cli_resources/tree_wrong.json +9 -0
- data/spec/cli_resources/tree_wrong.yml +6 -0
- data/spec/servers/httpserver.rb +0 -2
- data/spec/spec_helper.rb +4 -6
- data/spec/yasuri_cli_spec.rb +114 -0
- data/spec/yasuri_links_node_spec.rb +82 -58
- data/spec/yasuri_map_spec.rb +71 -0
- data/spec/yasuri_paginate_node_spec.rb +99 -88
- data/spec/yasuri_spec.rb +196 -138
- data/spec/yasuri_struct_node_spec.rb +120 -100
- data/spec/yasuri_text_node_spec.rb +22 -32
- data/yasuri.gemspec +29 -22
- metadata +105 -15
- data/app.rb +0 -52
- data/spec/yasuri_node_spec.rb +0 -11
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: yasuri
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 3.
|
4
|
+
version: 3.3.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- TAC
|
8
8
|
autorequire:
|
9
|
-
bindir:
|
9
|
+
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2021-03-
|
11
|
+
date: 2021-03-31 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -25,7 +25,7 @@ dependencies:
|
|
25
25
|
- !ruby/object:Gem::Version
|
26
26
|
version: '0'
|
27
27
|
- !ruby/object:Gem::Dependency
|
28
|
-
name:
|
28
|
+
name: codeclimate-test-reporter
|
29
29
|
requirement: !ruby/object:Gem::Requirement
|
30
30
|
requirements:
|
31
31
|
- - ">="
|
@@ -39,7 +39,7 @@ dependencies:
|
|
39
39
|
- !ruby/object:Gem::Version
|
40
40
|
version: '0'
|
41
41
|
- !ruby/object:Gem::Dependency
|
42
|
-
name:
|
42
|
+
name: coveralls
|
43
43
|
requirement: !ruby/object:Gem::Requirement
|
44
44
|
requirements:
|
45
45
|
- - ">="
|
@@ -81,7 +81,7 @@ dependencies:
|
|
81
81
|
- !ruby/object:Gem::Version
|
82
82
|
version: '0'
|
83
83
|
- !ruby/object:Gem::Dependency
|
84
|
-
name:
|
84
|
+
name: rake
|
85
85
|
requirement: !ruby/object:Gem::Requirement
|
86
86
|
requirements:
|
87
87
|
- - ">="
|
@@ -95,7 +95,7 @@ dependencies:
|
|
95
95
|
- !ruby/object:Gem::Version
|
96
96
|
version: '0'
|
97
97
|
- !ruby/object:Gem::Dependency
|
98
|
-
name:
|
98
|
+
name: rspec
|
99
99
|
requirement: !ruby/object:Gem::Requirement
|
100
100
|
requirements:
|
101
101
|
- - ">="
|
@@ -109,7 +109,63 @@ dependencies:
|
|
109
109
|
- !ruby/object:Gem::Version
|
110
110
|
version: '0'
|
111
111
|
- !ruby/object:Gem::Dependency
|
112
|
-
name:
|
112
|
+
name: rubocop
|
113
|
+
requirement: !ruby/object:Gem::Requirement
|
114
|
+
requirements:
|
115
|
+
- - ">="
|
116
|
+
- !ruby/object:Gem::Version
|
117
|
+
version: '0'
|
118
|
+
type: :development
|
119
|
+
prerelease: false
|
120
|
+
version_requirements: !ruby/object:Gem::Requirement
|
121
|
+
requirements:
|
122
|
+
- - ">="
|
123
|
+
- !ruby/object:Gem::Version
|
124
|
+
version: '0'
|
125
|
+
- !ruby/object:Gem::Dependency
|
126
|
+
name: rubocop-performance
|
127
|
+
requirement: !ruby/object:Gem::Requirement
|
128
|
+
requirements:
|
129
|
+
- - ">="
|
130
|
+
- !ruby/object:Gem::Version
|
131
|
+
version: '0'
|
132
|
+
type: :development
|
133
|
+
prerelease: false
|
134
|
+
version_requirements: !ruby/object:Gem::Requirement
|
135
|
+
requirements:
|
136
|
+
- - ">="
|
137
|
+
- !ruby/object:Gem::Version
|
138
|
+
version: '0'
|
139
|
+
- !ruby/object:Gem::Dependency
|
140
|
+
name: rubocop-rspec
|
141
|
+
requirement: !ruby/object:Gem::Requirement
|
142
|
+
requirements:
|
143
|
+
- - ">="
|
144
|
+
- !ruby/object:Gem::Version
|
145
|
+
version: '0'
|
146
|
+
type: :development
|
147
|
+
prerelease: false
|
148
|
+
version_requirements: !ruby/object:Gem::Requirement
|
149
|
+
requirements:
|
150
|
+
- - ">="
|
151
|
+
- !ruby/object:Gem::Version
|
152
|
+
version: '0'
|
153
|
+
- !ruby/object:Gem::Dependency
|
154
|
+
name: rubocop-rubycw
|
155
|
+
requirement: !ruby/object:Gem::Requirement
|
156
|
+
requirements:
|
157
|
+
- - ">="
|
158
|
+
- !ruby/object:Gem::Version
|
159
|
+
version: '0'
|
160
|
+
type: :development
|
161
|
+
prerelease: false
|
162
|
+
version_requirements: !ruby/object:Gem::Requirement
|
163
|
+
requirements:
|
164
|
+
- - ">="
|
165
|
+
- !ruby/object:Gem::Version
|
166
|
+
version: '0'
|
167
|
+
- !ruby/object:Gem::Dependency
|
168
|
+
name: simplecov
|
113
169
|
requirement: !ruby/object:Gem::Requirement
|
114
170
|
requirements:
|
115
171
|
- - ">="
|
@@ -136,10 +192,26 @@ dependencies:
|
|
136
192
|
- - ">="
|
137
193
|
- !ruby/object:Gem::Version
|
138
194
|
version: '0'
|
139
|
-
|
195
|
+
- !ruby/object:Gem::Dependency
|
196
|
+
name: thor
|
197
|
+
requirement: !ruby/object:Gem::Requirement
|
198
|
+
requirements:
|
199
|
+
- - ">="
|
200
|
+
- !ruby/object:Gem::Version
|
201
|
+
version: '0'
|
202
|
+
type: :runtime
|
203
|
+
prerelease: false
|
204
|
+
version_requirements: !ruby/object:Gem::Requirement
|
205
|
+
requirements:
|
206
|
+
- - ">="
|
207
|
+
- !ruby/object:Gem::Version
|
208
|
+
version: '0'
|
209
|
+
description: Yasuri (鑢) is a library for declarative web scraping and a command line
|
210
|
+
tool for scraping with it.
|
140
211
|
email:
|
141
212
|
- tac@tac42.net
|
142
|
-
executables:
|
213
|
+
executables:
|
214
|
+
- yasuri
|
143
215
|
extensions: []
|
144
216
|
extra_rdoc_files: []
|
145
217
|
files:
|
@@ -147,6 +219,8 @@ files:
|
|
147
219
|
- ".github/workflows/ruby.yml"
|
148
220
|
- ".gitignore"
|
149
221
|
- ".rspec"
|
222
|
+
- ".rubocop.yml"
|
223
|
+
- ".rubocop_todo.yml"
|
150
224
|
- ".ruby-version"
|
151
225
|
- ".travis.yml"
|
152
226
|
- Gemfile
|
@@ -155,16 +229,26 @@ files:
|
|
155
229
|
- Rakefile
|
156
230
|
- USAGE.ja.md
|
157
231
|
- USAGE.md
|
158
|
-
-
|
232
|
+
- examples/example.rb
|
233
|
+
- examples/github.yml
|
234
|
+
- examples/sample.json
|
235
|
+
- examples/sample.yml
|
236
|
+
- exe/yasuri
|
159
237
|
- lib/yasuri.rb
|
160
238
|
- lib/yasuri/version.rb
|
161
239
|
- lib/yasuri/yasuri.rb
|
240
|
+
- lib/yasuri/yasuri_cli.rb
|
162
241
|
- lib/yasuri/yasuri_links_node.rb
|
242
|
+
- lib/yasuri/yasuri_map_node.rb
|
163
243
|
- lib/yasuri/yasuri_node.rb
|
164
244
|
- lib/yasuri/yasuri_node_generator.rb
|
165
245
|
- lib/yasuri/yasuri_paginate_node.rb
|
166
246
|
- lib/yasuri/yasuri_struct_node.rb
|
167
247
|
- lib/yasuri/yasuri_text_node.rb
|
248
|
+
- spec/cli_resources/tree.json
|
249
|
+
- spec/cli_resources/tree.yml
|
250
|
+
- spec/cli_resources/tree_wrong.json
|
251
|
+
- spec/cli_resources/tree_wrong.yml
|
168
252
|
- spec/htdocs/child01.html
|
169
253
|
- spec/htdocs/child01_sub.html
|
170
254
|
- spec/htdocs/child02.html
|
@@ -180,8 +264,9 @@ files:
|
|
180
264
|
- spec/htdocs/struct/structual_text.html
|
181
265
|
- spec/servers/httpserver.rb
|
182
266
|
- spec/spec_helper.rb
|
267
|
+
- spec/yasuri_cli_spec.rb
|
183
268
|
- spec/yasuri_links_node_spec.rb
|
184
|
-
- spec/
|
269
|
+
- spec/yasuri_map_spec.rb
|
185
270
|
- spec/yasuri_paginate_node_spec.rb
|
186
271
|
- spec/yasuri_spec.rb
|
187
272
|
- spec/yasuri_struct_node_spec.rb
|
@@ -199,7 +284,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
199
284
|
requirements:
|
200
285
|
- - ">="
|
201
286
|
- !ruby/object:Gem::Version
|
202
|
-
version:
|
287
|
+
version: 2.7.0
|
203
288
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
204
289
|
requirements:
|
205
290
|
- - ">="
|
@@ -209,8 +294,12 @@ requirements: []
|
|
209
294
|
rubygems_version: 3.2.3
|
210
295
|
signing_key:
|
211
296
|
specification_version: 4
|
212
|
-
summary: Yasuri is
|
297
|
+
summary: Yasuri (鑢) is a library for declarative web scraping and cli.
|
213
298
|
test_files:
|
299
|
+
- spec/cli_resources/tree.json
|
300
|
+
- spec/cli_resources/tree.yml
|
301
|
+
- spec/cli_resources/tree_wrong.json
|
302
|
+
- spec/cli_resources/tree_wrong.yml
|
214
303
|
- spec/htdocs/child01.html
|
215
304
|
- spec/htdocs/child01_sub.html
|
216
305
|
- spec/htdocs/child02.html
|
@@ -226,8 +315,9 @@ test_files:
|
|
226
315
|
- spec/htdocs/struct/structual_text.html
|
227
316
|
- spec/servers/httpserver.rb
|
228
317
|
- spec/spec_helper.rb
|
318
|
+
- spec/yasuri_cli_spec.rb
|
229
319
|
- spec/yasuri_links_node_spec.rb
|
230
|
-
- spec/
|
320
|
+
- spec/yasuri_map_spec.rb
|
231
321
|
- spec/yasuri_paginate_node_spec.rb
|
232
322
|
- spec/yasuri_spec.rb
|
233
323
|
- spec/yasuri_struct_node_spec.rb
|
data/app.rb
DELETED
@@ -1,52 +0,0 @@
|
|
1
|
-
#!/usr/bin/env ruby
|
2
|
-
# -*- coding: utf-8 -*-
|
3
|
-
|
4
|
-
# Author:: TAC (tac@tac42.net)
|
5
|
-
|
6
|
-
require 'pp'
|
7
|
-
require 'time'
|
8
|
-
require 'mechanize'
|
9
|
-
|
10
|
-
require_relative 'lib/yasuri/yasuri'
|
11
|
-
|
12
|
-
agent = Mechanize.new
|
13
|
-
|
14
|
-
uri = "http://www.asahi.com/"
|
15
|
-
|
16
|
-
# Node tree constructing by DSL
|
17
|
-
root = Yasuri.links_top '//*[@id="MainInner"]/div[1]/ul/li/a' do
|
18
|
-
text_title '//*[@id="MainInner"]/div[1]/div/h1'
|
19
|
-
text_article '//*[@id="MainInner"]/div/div[@class="ArticleText"]'
|
20
|
-
end
|
21
|
-
|
22
|
-
# Node tree constructing by JSON
|
23
|
-
src = <<-EOJSON
|
24
|
-
{ "node" : "links",
|
25
|
-
"name" : "root",
|
26
|
-
"path" : "//*[@id='MainInner']/div[1]/ul/li/a",
|
27
|
-
"children" : [
|
28
|
-
{ "node" : "text",
|
29
|
-
"name" : "title",
|
30
|
-
"path" : "//*[@id='MainInner']/div[1]/div/h1"
|
31
|
-
},
|
32
|
-
{ "node" : "text",
|
33
|
-
"name" : "article",
|
34
|
-
"path" : "//*[@id='MainInner']/div/div[@class='ArticleText']"
|
35
|
-
}
|
36
|
-
]
|
37
|
-
}
|
38
|
-
EOJSON
|
39
|
-
root = Yasuri.json2tree(src)
|
40
|
-
|
41
|
-
# Access to parsed resources
|
42
|
-
page = agent.get(uri)
|
43
|
-
contents = root.inject(agent, page)
|
44
|
-
|
45
|
-
contents.each do |h|
|
46
|
-
t = h['title']
|
47
|
-
a = h['article']
|
48
|
-
|
49
|
-
puts t
|
50
|
-
puts a
|
51
|
-
puts "=" * 100
|
52
|
-
end
|