yasuri 2.0.13 → 3.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +5 -5
- data/.github/workflows/ruby.yml +35 -0
- data/.ruby-version +1 -1
- data/README.md +15 -7
- data/lib/yasuri/version.rb +1 -1
- data/lib/yasuri/yasuri.rb +6 -5
- data/lib/yasuri/yasuri_node_generator.rb +7 -9
- data/lib/yasuri/yasuri_text_node.rb +4 -1
- data/spec/spec_helper.rb +0 -5
- data/spec/yasuri_links_node_spec.rb +12 -4
- metadata +7 -7
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
|
-
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: 7f360d6efb02954a5a54e2fc308d0cd0c2e5c129c52eba727fb0dfe4a40ce502
|
4
|
+
data.tar.gz: 8d8805a55c7ce16c76eb50945b954ad19327a3a63183eca098dac6ac93d2203b
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: ffe02aee78de5f30f1e583b2aca8c0617324bdbf62d7c64e371e90d139bac8b1d26df23e9725df0b81b946c6a465283f88a7d51945872c56e7be892eac1b5e4e
|
7
|
+
data.tar.gz: c8983dc2cd283c7de0d97357d2a8164426ee3e1017e73c498c0676716a1c9ab4c42cc02a836bf7e559877d50ca23df6fa656c0197b5018a4881997e2fb4c57d0
|
@@ -0,0 +1,35 @@
|
|
1
|
+
# This workflow uses actions that are not certified by GitHub.
|
2
|
+
# They are provided by a third-party and are governed by
|
3
|
+
# separate terms of service, privacy policy, and support
|
4
|
+
# documentation.
|
5
|
+
# This workflow will download a prebuilt Ruby version, install dependencies and run tests with Rake
|
6
|
+
# For more information see: https://github.com/marketplace/actions/setup-ruby-jruby-and-truffleruby
|
7
|
+
|
8
|
+
name: Ruby
|
9
|
+
|
10
|
+
on:
|
11
|
+
push:
|
12
|
+
branches: [ master ]
|
13
|
+
pull_request:
|
14
|
+
branches: [ master ]
|
15
|
+
|
16
|
+
jobs:
|
17
|
+
test:
|
18
|
+
|
19
|
+
runs-on: ubuntu-latest
|
20
|
+
strategy:
|
21
|
+
matrix:
|
22
|
+
ruby-version: ['2.6', '2.7', '3.0']
|
23
|
+
|
24
|
+
steps:
|
25
|
+
- uses: actions/checkout@v2
|
26
|
+
- name: Set up Ruby
|
27
|
+
# To automatically get bug fixes and new Ruby versions for ruby/setup-ruby,
|
28
|
+
# change this to (see https://github.com/ruby/setup-ruby#versioning):
|
29
|
+
# uses: ruby/setup-ruby@v1
|
30
|
+
uses: ruby/setup-ruby@473e4d8fe5dd94ee328fdfca9f8c9c7afc9dae5e
|
31
|
+
with:
|
32
|
+
ruby-version: ${{ matrix.ruby-version }}
|
33
|
+
bundler-cache: true # runs 'bundle install' and caches installed gems automatically
|
34
|
+
- name: Run tests
|
35
|
+
run: bundle exec rake
|
data/.ruby-version
CHANGED
@@ -1 +1 @@
|
|
1
|
-
|
1
|
+
3.0.0
|
data/README.md
CHANGED
@@ -1,4 +1,5 @@
|
|
1
|
-
# Yasuri
|
1
|
+
# Yasuri
|
2
|
+
[](https://travis-ci.org/tac0x2a/yasuri) [](https://coveralls.io/r/tac0x2a/yasuri?branch=master) [](https://codeclimate.com/github/tac0x2a/yasuri/maintainability)
|
2
3
|
|
3
4
|
Yasuri (鑢) is an easy web-scraping library for supporting "[Mechanize](https://github.com/sparklemotion/mechanize)".
|
4
5
|
|
@@ -59,12 +60,8 @@ root:
|
|
59
60
|
node: links
|
60
61
|
path: "//*[@id='menu']/ul/li/a"
|
61
62
|
children:
|
62
|
-
- title:
|
63
|
-
|
64
|
-
path: "//*[@id='contents']/h2"
|
65
|
-
- content:
|
66
|
-
node: text
|
67
|
-
path: "//*[@id='contents']/p[1]"
|
63
|
+
- title: { node: text, path: "//*[@id='contents']/h2" }
|
64
|
+
- content: { node: text, path: "//*[@id='contents']/p[1]" }
|
68
65
|
EOYAML
|
69
66
|
root = Yasuri.yaml2tree(src)
|
70
67
|
|
@@ -95,6 +92,17 @@ result = root.inject(agent, root_page)
|
|
95
92
|
# => [ {"title" => "PageTitle", "content" => "Page Contents" }, ... ]
|
96
93
|
```
|
97
94
|
|
95
|
+
## Dev
|
96
|
+
```sh
|
97
|
+
$ gem install bundler
|
98
|
+
$ bundle install
|
99
|
+
```
|
100
|
+
### Test
|
101
|
+
```sh
|
102
|
+
$ rake
|
103
|
+
# or
|
104
|
+
$ rspec spec/*spec.rb
|
105
|
+
```
|
98
106
|
|
99
107
|
## Contributing
|
100
108
|
|
data/lib/yasuri/version.rb
CHANGED
data/lib/yasuri/yasuri.rb
CHANGED
@@ -54,9 +54,9 @@ module Yasuri
|
|
54
54
|
body
|
55
55
|
end
|
56
56
|
|
57
|
-
def self.method_missing(
|
58
|
-
generated = Yasuri::NodeGenerator.gen(
|
59
|
-
generated || super(
|
57
|
+
def self.method_missing(node_name, pattern, **opt, &block)
|
58
|
+
generated = Yasuri::NodeGenerator.gen(node_name, pattern, **opt, &block)
|
59
|
+
generated || super(node_name, **opt)
|
60
60
|
end
|
61
61
|
|
62
62
|
private
|
@@ -85,7 +85,7 @@ module Yasuri
|
|
85
85
|
|
86
86
|
klass = Text2Node[node.to_sym]
|
87
87
|
fail "Undefined node type #{node}" if klass.nil?
|
88
|
-
klass.new(path, name, childnodes, opt)
|
88
|
+
klass.new(path, name, childnodes, **opt)
|
89
89
|
end
|
90
90
|
|
91
91
|
def self.node2hash(node)
|
@@ -109,7 +109,8 @@ module Yasuri
|
|
109
109
|
json
|
110
110
|
end
|
111
111
|
|
112
|
-
def self.NodeName(name,
|
112
|
+
def self.NodeName(name, opt)
|
113
|
+
symbolize_names = opt[:symbolize_names]
|
113
114
|
symbolize_names ? name.to_sym : name
|
114
115
|
end
|
115
116
|
|
@@ -15,26 +15,24 @@ module Yasuri
|
|
15
15
|
@nodes
|
16
16
|
end
|
17
17
|
|
18
|
-
def method_missing(name,
|
19
|
-
node = NodeGenerator.gen(name,
|
18
|
+
def method_missing(name, pattern, **args, &block)
|
19
|
+
node = NodeGenerator.gen(name, pattern, **args, &block)
|
20
20
|
raise "Undefined Node Name '#{name}'" if node == nil
|
21
21
|
@nodes << node
|
22
22
|
end
|
23
23
|
|
24
|
-
def self.gen(name,
|
25
|
-
xpath, opt = *args
|
26
|
-
opt = [opt].flatten.compact
|
24
|
+
def self.gen(name, xpath, **opt, &block)
|
27
25
|
children = Yasuri::NodeGenerator.new.gen_recursive(&block) if block_given?
|
28
26
|
|
29
27
|
case name
|
30
28
|
when /^text_(.+)$/
|
31
|
-
Yasuri::TextNode.new(xpath, $1, children || [],
|
29
|
+
Yasuri::TextNode.new(xpath, $1, children || [], **opt)
|
32
30
|
when /^struct_(.+)$/
|
33
|
-
Yasuri::StructNode.new(xpath, $1, children || [],
|
31
|
+
Yasuri::StructNode.new(xpath, $1, children || [], **opt)
|
34
32
|
when /^links_(.+)$/
|
35
|
-
Yasuri::LinksNode.new(xpath, $1, children || [],
|
33
|
+
Yasuri::LinksNode.new(xpath, $1, children || [], **opt)
|
36
34
|
when /^pages_(.+)$/
|
37
|
-
Yasuri::PaginateNode.new(xpath, $1, children || [],
|
35
|
+
Yasuri::PaginateNode.new(xpath, $1, children || [], **opt)
|
38
36
|
else
|
39
37
|
nil
|
40
38
|
end
|
@@ -7,9 +7,12 @@ module Yasuri
|
|
7
7
|
class TextNode
|
8
8
|
include Node
|
9
9
|
|
10
|
-
def initialize(xpath, name, children = [],
|
10
|
+
def initialize(xpath, name, children = [], **opt)
|
11
11
|
super(xpath, name, children)
|
12
12
|
|
13
|
+
truncate = opt[:truncate]
|
14
|
+
proc = opt[:proc]
|
15
|
+
|
13
16
|
truncate = Regexp.new(truncate) if not truncate.nil? # regexp or nil
|
14
17
|
@truncate = truncate
|
15
18
|
@truncate = Regexp.new(@truncate.to_s) if not @truncate.nil?
|
data/spec/spec_helper.rb
CHANGED
@@ -12,11 +12,6 @@ shared_context 'httpserver' do
|
|
12
12
|
}
|
13
13
|
end
|
14
14
|
|
15
|
-
|
16
|
-
# ENV['CODECLIMATE_REPO_TOKEN'] = "0dc78d33107a7f11f257c0218ac1a37e0073005bb9734f2fd61d0f7e803fc151"
|
17
|
-
# require "codeclimate-test-reporter"
|
18
|
-
# CodeClimate::TestReporter.start
|
19
|
-
|
20
15
|
require 'simplecov'
|
21
16
|
require 'coveralls'
|
22
17
|
Coveralls.wear!
|
@@ -59,10 +59,18 @@ describe 'Yasuri' do
|
|
59
59
|
]
|
60
60
|
expect(actual).to match expected
|
61
61
|
end
|
62
|
-
it 'can be defined by DSL, return
|
63
|
-
|
64
|
-
|
65
|
-
|
62
|
+
it 'can be defined by DSL, return no contains if no child node' do
|
63
|
+
root_node = Yasuri.links_title '/html/body/a'
|
64
|
+
actual = root_node.inject(@agent, @index_page)
|
65
|
+
expected = [{}, {}, {}] # Empty if no child node under links node.
|
66
|
+
expect(actual).to match expected
|
67
|
+
end
|
68
|
+
|
69
|
+
it 'can be defined return no contains if no child node' do
|
70
|
+
root_node = Yasuri::LinksNode.new('/html/body/a', "title")
|
71
|
+
actual = root_node.inject(@agent, @index_page)
|
72
|
+
expected = [{}, {}, {}] # Empty if no child node under links node.
|
73
|
+
expect(actual).to match expected
|
66
74
|
end
|
67
75
|
it 'can be defined by DSL, return nested contents under link' do
|
68
76
|
generated = Yasuri.links_title '/html/body/a' do
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: yasuri
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version:
|
4
|
+
version: 3.0.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- TAC
|
8
|
-
autorequire:
|
8
|
+
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2021-03-18 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -144,6 +144,7 @@ extensions: []
|
|
144
144
|
extra_rdoc_files: []
|
145
145
|
files:
|
146
146
|
- ".coveralls.yml"
|
147
|
+
- ".github/workflows/ruby.yml"
|
147
148
|
- ".gitignore"
|
148
149
|
- ".rspec"
|
149
150
|
- ".ruby-version"
|
@@ -190,7 +191,7 @@ homepage: https://github.com/tac0x2a/yasuri
|
|
190
191
|
licenses:
|
191
192
|
- MIT
|
192
193
|
metadata: {}
|
193
|
-
post_install_message:
|
194
|
+
post_install_message:
|
194
195
|
rdoc_options: []
|
195
196
|
require_paths:
|
196
197
|
- lib
|
@@ -205,9 +206,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
205
206
|
- !ruby/object:Gem::Version
|
206
207
|
version: '0'
|
207
208
|
requirements: []
|
208
|
-
|
209
|
-
|
210
|
-
signing_key:
|
209
|
+
rubygems_version: 3.2.3
|
210
|
+
signing_key:
|
211
211
|
specification_version: 4
|
212
212
|
summary: Yasuri is easy scraping library.
|
213
213
|
test_files:
|