yasuri 2.0.13 → 3.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +5 -5
- data/.github/workflows/ruby.yml +35 -0
- data/.ruby-version +1 -1
- data/README.md +15 -7
- data/lib/yasuri/version.rb +1 -1
- data/lib/yasuri/yasuri.rb +6 -5
- data/lib/yasuri/yasuri_node_generator.rb +7 -9
- data/lib/yasuri/yasuri_text_node.rb +4 -1
- data/spec/spec_helper.rb +0 -5
- data/spec/yasuri_links_node_spec.rb +12 -4
- metadata +7 -7
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
|
-
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: 7f360d6efb02954a5a54e2fc308d0cd0c2e5c129c52eba727fb0dfe4a40ce502
|
4
|
+
data.tar.gz: 8d8805a55c7ce16c76eb50945b954ad19327a3a63183eca098dac6ac93d2203b
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: ffe02aee78de5f30f1e583b2aca8c0617324bdbf62d7c64e371e90d139bac8b1d26df23e9725df0b81b946c6a465283f88a7d51945872c56e7be892eac1b5e4e
|
7
|
+
data.tar.gz: c8983dc2cd283c7de0d97357d2a8164426ee3e1017e73c498c0676716a1c9ab4c42cc02a836bf7e559877d50ca23df6fa656c0197b5018a4881997e2fb4c57d0
|
@@ -0,0 +1,35 @@
|
|
1
|
+
# This workflow uses actions that are not certified by GitHub.
|
2
|
+
# They are provided by a third-party and are governed by
|
3
|
+
# separate terms of service, privacy policy, and support
|
4
|
+
# documentation.
|
5
|
+
# This workflow will download a prebuilt Ruby version, install dependencies and run tests with Rake
|
6
|
+
# For more information see: https://github.com/marketplace/actions/setup-ruby-jruby-and-truffleruby
|
7
|
+
|
8
|
+
name: Ruby
|
9
|
+
|
10
|
+
on:
|
11
|
+
push:
|
12
|
+
branches: [ master ]
|
13
|
+
pull_request:
|
14
|
+
branches: [ master ]
|
15
|
+
|
16
|
+
jobs:
|
17
|
+
test:
|
18
|
+
|
19
|
+
runs-on: ubuntu-latest
|
20
|
+
strategy:
|
21
|
+
matrix:
|
22
|
+
ruby-version: ['2.6', '2.7', '3.0']
|
23
|
+
|
24
|
+
steps:
|
25
|
+
- uses: actions/checkout@v2
|
26
|
+
- name: Set up Ruby
|
27
|
+
# To automatically get bug fixes and new Ruby versions for ruby/setup-ruby,
|
28
|
+
# change this to (see https://github.com/ruby/setup-ruby#versioning):
|
29
|
+
# uses: ruby/setup-ruby@v1
|
30
|
+
uses: ruby/setup-ruby@473e4d8fe5dd94ee328fdfca9f8c9c7afc9dae5e
|
31
|
+
with:
|
32
|
+
ruby-version: ${{ matrix.ruby-version }}
|
33
|
+
bundler-cache: true # runs 'bundle install' and caches installed gems automatically
|
34
|
+
- name: Run tests
|
35
|
+
run: bundle exec rake
|
data/.ruby-version
CHANGED
@@ -1 +1 @@
|
|
1
|
-
|
1
|
+
3.0.0
|
data/README.md
CHANGED
@@ -1,4 +1,5 @@
|
|
1
|
-
# Yasuri
|
1
|
+
# Yasuri
|
2
|
+
[![Build Status](https://travis-ci.org/tac0x2a/yasuri.svg?branch=master)](https://travis-ci.org/tac0x2a/yasuri) [![Coverage Status](https://coveralls.io/repos/tac0x2a/yasuri/badge.svg?branch=master)](https://coveralls.io/r/tac0x2a/yasuri?branch=master) [![Maintainability](https://api.codeclimate.com/v1/badges/c29480fea1305afe999f/maintainability)](https://codeclimate.com/github/tac0x2a/yasuri/maintainability)
|
2
3
|
|
3
4
|
Yasuri (鑢) is an easy web-scraping library for supporting "[Mechanize](https://github.com/sparklemotion/mechanize)".
|
4
5
|
|
@@ -59,12 +60,8 @@ root:
|
|
59
60
|
node: links
|
60
61
|
path: "//*[@id='menu']/ul/li/a"
|
61
62
|
children:
|
62
|
-
- title:
|
63
|
-
|
64
|
-
path: "//*[@id='contents']/h2"
|
65
|
-
- content:
|
66
|
-
node: text
|
67
|
-
path: "//*[@id='contents']/p[1]"
|
63
|
+
- title: { node: text, path: "//*[@id='contents']/h2" }
|
64
|
+
- content: { node: text, path: "//*[@id='contents']/p[1]" }
|
68
65
|
EOYAML
|
69
66
|
root = Yasuri.yaml2tree(src)
|
70
67
|
|
@@ -95,6 +92,17 @@ result = root.inject(agent, root_page)
|
|
95
92
|
# => [ {"title" => "PageTitle", "content" => "Page Contents" }, ... ]
|
96
93
|
```
|
97
94
|
|
95
|
+
## Dev
|
96
|
+
```sh
|
97
|
+
$ gem install bundler
|
98
|
+
$ bundle install
|
99
|
+
```
|
100
|
+
### Test
|
101
|
+
```sh
|
102
|
+
$ rake
|
103
|
+
# or
|
104
|
+
$ rspec spec/*spec.rb
|
105
|
+
```
|
98
106
|
|
99
107
|
## Contributing
|
100
108
|
|
data/lib/yasuri/version.rb
CHANGED
data/lib/yasuri/yasuri.rb
CHANGED
@@ -54,9 +54,9 @@ module Yasuri
|
|
54
54
|
body
|
55
55
|
end
|
56
56
|
|
57
|
-
def self.method_missing(
|
58
|
-
generated = Yasuri::NodeGenerator.gen(
|
59
|
-
generated || super(
|
57
|
+
def self.method_missing(node_name, pattern, **opt, &block)
|
58
|
+
generated = Yasuri::NodeGenerator.gen(node_name, pattern, **opt, &block)
|
59
|
+
generated || super(node_name, **opt)
|
60
60
|
end
|
61
61
|
|
62
62
|
private
|
@@ -85,7 +85,7 @@ module Yasuri
|
|
85
85
|
|
86
86
|
klass = Text2Node[node.to_sym]
|
87
87
|
fail "Undefined node type #{node}" if klass.nil?
|
88
|
-
klass.new(path, name, childnodes, opt)
|
88
|
+
klass.new(path, name, childnodes, **opt)
|
89
89
|
end
|
90
90
|
|
91
91
|
def self.node2hash(node)
|
@@ -109,7 +109,8 @@ module Yasuri
|
|
109
109
|
json
|
110
110
|
end
|
111
111
|
|
112
|
-
def self.NodeName(name,
|
112
|
+
def self.NodeName(name, opt)
|
113
|
+
symbolize_names = opt[:symbolize_names]
|
113
114
|
symbolize_names ? name.to_sym : name
|
114
115
|
end
|
115
116
|
|
@@ -15,26 +15,24 @@ module Yasuri
|
|
15
15
|
@nodes
|
16
16
|
end
|
17
17
|
|
18
|
-
def method_missing(name,
|
19
|
-
node = NodeGenerator.gen(name,
|
18
|
+
def method_missing(name, pattern, **args, &block)
|
19
|
+
node = NodeGenerator.gen(name, pattern, **args, &block)
|
20
20
|
raise "Undefined Node Name '#{name}'" if node == nil
|
21
21
|
@nodes << node
|
22
22
|
end
|
23
23
|
|
24
|
-
def self.gen(name,
|
25
|
-
xpath, opt = *args
|
26
|
-
opt = [opt].flatten.compact
|
24
|
+
def self.gen(name, xpath, **opt, &block)
|
27
25
|
children = Yasuri::NodeGenerator.new.gen_recursive(&block) if block_given?
|
28
26
|
|
29
27
|
case name
|
30
28
|
when /^text_(.+)$/
|
31
|
-
Yasuri::TextNode.new(xpath, $1, children || [],
|
29
|
+
Yasuri::TextNode.new(xpath, $1, children || [], **opt)
|
32
30
|
when /^struct_(.+)$/
|
33
|
-
Yasuri::StructNode.new(xpath, $1, children || [],
|
31
|
+
Yasuri::StructNode.new(xpath, $1, children || [], **opt)
|
34
32
|
when /^links_(.+)$/
|
35
|
-
Yasuri::LinksNode.new(xpath, $1, children || [],
|
33
|
+
Yasuri::LinksNode.new(xpath, $1, children || [], **opt)
|
36
34
|
when /^pages_(.+)$/
|
37
|
-
Yasuri::PaginateNode.new(xpath, $1, children || [],
|
35
|
+
Yasuri::PaginateNode.new(xpath, $1, children || [], **opt)
|
38
36
|
else
|
39
37
|
nil
|
40
38
|
end
|
@@ -7,9 +7,12 @@ module Yasuri
|
|
7
7
|
class TextNode
|
8
8
|
include Node
|
9
9
|
|
10
|
-
def initialize(xpath, name, children = [],
|
10
|
+
def initialize(xpath, name, children = [], **opt)
|
11
11
|
super(xpath, name, children)
|
12
12
|
|
13
|
+
truncate = opt[:truncate]
|
14
|
+
proc = opt[:proc]
|
15
|
+
|
13
16
|
truncate = Regexp.new(truncate) if not truncate.nil? # regexp or nil
|
14
17
|
@truncate = truncate
|
15
18
|
@truncate = Regexp.new(@truncate.to_s) if not @truncate.nil?
|
data/spec/spec_helper.rb
CHANGED
@@ -12,11 +12,6 @@ shared_context 'httpserver' do
|
|
12
12
|
}
|
13
13
|
end
|
14
14
|
|
15
|
-
|
16
|
-
# ENV['CODECLIMATE_REPO_TOKEN'] = "0dc78d33107a7f11f257c0218ac1a37e0073005bb9734f2fd61d0f7e803fc151"
|
17
|
-
# require "codeclimate-test-reporter"
|
18
|
-
# CodeClimate::TestReporter.start
|
19
|
-
|
20
15
|
require 'simplecov'
|
21
16
|
require 'coveralls'
|
22
17
|
Coveralls.wear!
|
@@ -59,10 +59,18 @@ describe 'Yasuri' do
|
|
59
59
|
]
|
60
60
|
expect(actual).to match expected
|
61
61
|
end
|
62
|
-
it 'can be defined by DSL, return
|
63
|
-
|
64
|
-
|
65
|
-
|
62
|
+
it 'can be defined by DSL, return no contains if no child node' do
|
63
|
+
root_node = Yasuri.links_title '/html/body/a'
|
64
|
+
actual = root_node.inject(@agent, @index_page)
|
65
|
+
expected = [{}, {}, {}] # Empty if no child node under links node.
|
66
|
+
expect(actual).to match expected
|
67
|
+
end
|
68
|
+
|
69
|
+
it 'can be defined return no contains if no child node' do
|
70
|
+
root_node = Yasuri::LinksNode.new('/html/body/a', "title")
|
71
|
+
actual = root_node.inject(@agent, @index_page)
|
72
|
+
expected = [{}, {}, {}] # Empty if no child node under links node.
|
73
|
+
expect(actual).to match expected
|
66
74
|
end
|
67
75
|
it 'can be defined by DSL, return nested contents under link' do
|
68
76
|
generated = Yasuri.links_title '/html/body/a' do
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: yasuri
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version:
|
4
|
+
version: 3.0.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- TAC
|
8
|
-
autorequire:
|
8
|
+
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2021-03-18 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -144,6 +144,7 @@ extensions: []
|
|
144
144
|
extra_rdoc_files: []
|
145
145
|
files:
|
146
146
|
- ".coveralls.yml"
|
147
|
+
- ".github/workflows/ruby.yml"
|
147
148
|
- ".gitignore"
|
148
149
|
- ".rspec"
|
149
150
|
- ".ruby-version"
|
@@ -190,7 +191,7 @@ homepage: https://github.com/tac0x2a/yasuri
|
|
190
191
|
licenses:
|
191
192
|
- MIT
|
192
193
|
metadata: {}
|
193
|
-
post_install_message:
|
194
|
+
post_install_message:
|
194
195
|
rdoc_options: []
|
195
196
|
require_paths:
|
196
197
|
- lib
|
@@ -205,9 +206,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
205
206
|
- !ruby/object:Gem::Version
|
206
207
|
version: '0'
|
207
208
|
requirements: []
|
208
|
-
|
209
|
-
|
210
|
-
signing_key:
|
209
|
+
rubygems_version: 3.2.3
|
210
|
+
signing_key:
|
211
211
|
specification_version: 4
|
212
212
|
summary: Yasuri is easy scraping library.
|
213
213
|
test_files:
|