mechanizer 1.11 → 1.12
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +10 -10
- data/Rakefile +3 -3
- data/lib/mechanizer/noko.rb +4 -4
- data/lib/mechanizer/version.rb +1 -1
- data/mechanizer.gemspec +1 -1
- metadata +3 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: bf5e0126c5a9c8e0dbf5eeb4a99e2c0f241ce95348abbdd0b70ce646aefe8063
|
4
|
+
data.tar.gz: 1fdd68e34f4abec9a7f97eb1fd1b71b74bdee20c4915c0157ca013110e67dc94
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 5848e452bd1054800b47265699979c87d6dd44300d9f4614dd9f4181933365149e9e6e05906ba19f044db155098e600a81d7842b4996a1d2722da2d32bc271c5
|
7
|
+
data.tar.gz: 8061245ddedef9f9cc718808a541797f271ae45f12d944e86792e12f9c5cf97306cbfaa1d87f97d5c5f194ccdaa306854528a7dfb77cb138e14e6edc82a21a31
|
data/README.md
CHANGED
@@ -58,20 +58,20 @@ noko_hash = noko.scrape(args)
|
|
58
58
|
```
|
59
59
|
err_msg = noko_hash[:err_msg]
|
60
60
|
page = noko_hash[:page]
|
61
|
-
|
61
|
+
texts_and_paths = noko_hash[:texts_and_paths]
|
62
62
|
```
|
63
63
|
|
64
|
-
### 4. Example Texts &
|
64
|
+
### 4. Example Texts & paths:
|
65
65
|
|
66
66
|
```
|
67
|
-
|
68
|
-
{:text=>"english 5 675 000+ articles", :
|
69
|
-
{:text=>"日本語 1 111 000+ 記事", :
|
70
|
-
{:text=>"español 1 427 000+ artículos", :
|
71
|
-
{:text=>"deutsch 2 195 000+ artikel", :
|
72
|
-
{:text=>"русский 1 481 000+ статей", :
|
73
|
-
{:text=>"français 1 997 000+ articles", :
|
74
|
-
{:text=>"italiano 1 446 000+ voci", :
|
67
|
+
texts_and_paths = [
|
68
|
+
{:text=>"english 5 675 000+ articles", :path=>"//en.wikipedia.org/"},
|
69
|
+
{:text=>"日本語 1 111 000+ 記事", :path=>"//ja.wikipedia.org/"},
|
70
|
+
{:text=>"español 1 427 000+ artículos", :path=>"//es.wikipedia.org/"},
|
71
|
+
{:text=>"deutsch 2 195 000+ artikel", :path=>"//de.wikipedia.org/"},
|
72
|
+
{:text=>"русский 1 481 000+ статей", :path=>"//ru.wikipedia.org/"},
|
73
|
+
{:text=>"français 1 997 000+ articles", :path=>"//fr.wikipedia.org/"},
|
74
|
+
{:text=>"italiano 1 446 000+ voci", :path=>"//it.wikipedia.org/"}
|
75
75
|
]
|
76
76
|
```
|
77
77
|
|
data/Rakefile
CHANGED
@@ -16,8 +16,8 @@ task :console do
|
|
16
16
|
require "active_support/all"
|
17
17
|
ARGV.clear
|
18
18
|
|
19
|
-
noko_page_hash = run_mechanizer
|
20
|
-
binding.pry
|
19
|
+
# noko_page_hash = run_mechanizer
|
20
|
+
# binding.pry
|
21
21
|
|
22
22
|
IRB.start
|
23
23
|
end
|
@@ -31,7 +31,7 @@ def run_mechanizer
|
|
31
31
|
|
32
32
|
err_msg = noko_hash[:err_msg]
|
33
33
|
page = noko_hash[:page]
|
34
|
-
|
34
|
+
texts_and_paths = noko_hash[:texts_and_paths]
|
35
35
|
|
36
36
|
other_projects = page.css('.other-project')&.text
|
37
37
|
other_projects = other_projects.split("\n").reject(&:blank?)
|
data/lib/mechanizer/noko.rb
CHANGED
@@ -18,7 +18,7 @@ module Mechanizer
|
|
18
18
|
def scrape(args)
|
19
19
|
@timeout = args.fetch(:timeout, 60)
|
20
20
|
url = args.fetch(:url)
|
21
|
-
noko_hash = { url: url, err_msg: nil,
|
21
|
+
noko_hash = { url: url, err_msg: nil, texts_and_paths: {}, page: nil }
|
22
22
|
noko_hash = start_noko(noko_hash)
|
23
23
|
noko_hash = extract_links(noko_hash)
|
24
24
|
noko_hash
|
@@ -27,10 +27,10 @@ module Mechanizer
|
|
27
27
|
def extract_links(noko_hash)
|
28
28
|
links = noko_hash[:page]&.links
|
29
29
|
unless noko_hash[:err_msg].present? || !links.present?
|
30
|
-
noko_hash[:
|
30
|
+
noko_hash[:texts_and_paths] = links.map do |link|
|
31
31
|
text = link.text&.downcase&.gsub(/\s+/, ' ')&.strip
|
32
|
-
|
33
|
-
|
32
|
+
path = link&.href&.downcase&.strip
|
33
|
+
text_and_path = {text: text, path: path}
|
34
34
|
end
|
35
35
|
end
|
36
36
|
noko_hash
|
data/lib/mechanizer/version.rb
CHANGED
data/mechanizer.gemspec
CHANGED
@@ -33,7 +33,7 @@ Gem::Specification.new do |spec|
|
|
33
33
|
spec.required_ruby_version = '~> 2.5.1'
|
34
34
|
spec.add_dependency 'activesupport', '~> 5.2'
|
35
35
|
spec.add_dependency 'utf8_sanitizer', '~> 2.16'
|
36
|
-
spec.add_dependency 'crm_formatter', '~> 2.
|
36
|
+
spec.add_dependency 'crm_formatter', '~> 2.64'
|
37
37
|
spec.add_dependency 'mechanize', '~> 2.7', '>= 2.7.6'
|
38
38
|
|
39
39
|
# spec.add_dependency "activesupport-inflector", ['~> 0.1.0']
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: mechanizer
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: '1.
|
4
|
+
version: '1.12'
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Adam Booth
|
@@ -44,14 +44,14 @@ dependencies:
|
|
44
44
|
requirements:
|
45
45
|
- - "~>"
|
46
46
|
- !ruby/object:Gem::Version
|
47
|
-
version: '2.
|
47
|
+
version: '2.64'
|
48
48
|
type: :runtime
|
49
49
|
prerelease: false
|
50
50
|
version_requirements: !ruby/object:Gem::Requirement
|
51
51
|
requirements:
|
52
52
|
- - "~>"
|
53
53
|
- !ruby/object:Gem::Version
|
54
|
-
version: '2.
|
54
|
+
version: '2.64'
|
55
55
|
- !ruby/object:Gem::Dependency
|
56
56
|
name: mechanize
|
57
57
|
requirement: !ruby/object:Gem::Requirement
|