link_scraper 1.02 → 2.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +1 -1
- data/Rakefile +1 -4
- data/lib/link_scraper/scrape.rb +0 -1
- data/lib/link_scraper/version.rb +1 -1
- data/link_scraper.gemspec +3 -9
- metadata +6 -20
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: f157151d90a4cbcaa975c3ea1fd4860898d0bd3347e9ac68657253e10c27e3ff
|
4
|
+
data.tar.gz: 8e9b320f0a7d2491e46777620e13439b0002d037a596a8339d08f5da4eb93737
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 89a161348ff507dc0265ee444eb16616d483aebadfaa7491db73c7b2ee227d8696e5e1aada536e43932aa625f8cc8ccaa2a0f343a0228204118f98140214ad9f
|
7
|
+
data.tar.gz: dce0719315e35776ce7348d57de9128a9104ece9e7404025286195aa6e031724d512811ca67fda627c9376f6a7d8a939f3cce3d00395354851db6421ac63e54f
|
data/README.md
CHANGED
@@ -39,7 +39,7 @@ path_criteria = {
|
|
39
39
|
neg_criteria: %w[drought school]
|
40
40
|
}
|
41
41
|
|
42
|
-
scraper = LinkScraper::Scrape.new(text_criteria: text_criteria, path_criteria: path_criteria)
|
42
|
+
scraper = LinkScraper::Scrape.new({text_criteria: text_criteria, path_criteria: path_criteria})
|
43
43
|
scraped_links = scraper.start('https://en.wikipedia.org/wiki/Austin%2C_Texas')
|
44
44
|
```
|
45
45
|
|
data/Rakefile
CHANGED
@@ -18,7 +18,6 @@ task :console do
|
|
18
18
|
ARGV.clear
|
19
19
|
|
20
20
|
scraped_links = run_link_scraper
|
21
|
-
binding.pry
|
22
21
|
|
23
22
|
IRB.start
|
24
23
|
end
|
@@ -36,10 +35,8 @@ def run_link_scraper
|
|
36
35
|
neg_criteria: %w[drought school]
|
37
36
|
}
|
38
37
|
|
39
|
-
scraper = LinkScraper::Scrape.new(text_criteria: text_criteria, path_criteria: path_criteria)
|
38
|
+
scraper = LinkScraper::Scrape.new({text_criteria: text_criteria, path_criteria: path_criteria})
|
40
39
|
scraped_links = scraper.start('https://en.wikipedia.org/wiki/Austin%2C_Texas')
|
41
40
|
|
42
|
-
binding.pry
|
43
|
-
|
44
41
|
# scraper = LinkScraper::Scrape.new(WebsCriteria.all_scrub_web_criteria)
|
45
42
|
end
|
data/lib/link_scraper/scrape.rb
CHANGED
data/lib/link_scraper/version.rb
CHANGED
data/link_scraper.gemspec
CHANGED
@@ -22,12 +22,6 @@ Gem::Specification.new do |spec|
|
|
22
22
|
'public gem pushes.'
|
23
23
|
end
|
24
24
|
|
25
|
-
# Specify which files should be added to the gem when it is released.
|
26
|
-
# The `git ls-files -z` loads the files in the RubyGem that have been added into git.
|
27
|
-
# spec.files = Dir.chdir(File.expand_path('..', __FILE__)) do
|
28
|
-
# `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
|
29
|
-
# end
|
30
|
-
|
31
25
|
spec.files = `git ls-files -z`.split("\x0").reject do |f|
|
32
26
|
f.match(%r{^(test|spec|features)/})
|
33
27
|
end
|
@@ -38,11 +32,11 @@ Gem::Specification.new do |spec|
|
|
38
32
|
|
39
33
|
spec.required_ruby_version = '~> 2.5.1'
|
40
34
|
spec.add_dependency 'activesupport', '~> 5.2'
|
41
|
-
spec.add_dependency 'crm_formatter', '~> 2.
|
35
|
+
spec.add_dependency 'crm_formatter', '~> 2.65'
|
42
36
|
spec.add_dependency 'mechanizer', '~> 1.12'
|
43
|
-
spec.add_dependency 'scrub_db', '~> 2.
|
37
|
+
spec.add_dependency 'scrub_db', '~> 2.24'
|
44
38
|
spec.add_dependency 'url_verifier', '~> 2.12'
|
45
|
-
spec.add_dependency 'utf8_sanitizer', '~> 2.16'
|
39
|
+
# spec.add_dependency 'utf8_sanitizer', '~> 2.16'
|
46
40
|
|
47
41
|
# spec.add_dependency "activesupport-inflector", ['~> 0.1.0']
|
48
42
|
spec.add_development_dependency 'bundler', '~> 1.16', '>= 1.16.2'
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: link_scraper
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: '
|
4
|
+
version: '2.0'
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Adam Booth
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2018-07-
|
11
|
+
date: 2018-07-10 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: activesupport
|
@@ -30,14 +30,14 @@ dependencies:
|
|
30
30
|
requirements:
|
31
31
|
- - "~>"
|
32
32
|
- !ruby/object:Gem::Version
|
33
|
-
version: '2.
|
33
|
+
version: '2.65'
|
34
34
|
type: :runtime
|
35
35
|
prerelease: false
|
36
36
|
version_requirements: !ruby/object:Gem::Requirement
|
37
37
|
requirements:
|
38
38
|
- - "~>"
|
39
39
|
- !ruby/object:Gem::Version
|
40
|
-
version: '2.
|
40
|
+
version: '2.65'
|
41
41
|
- !ruby/object:Gem::Dependency
|
42
42
|
name: mechanizer
|
43
43
|
requirement: !ruby/object:Gem::Requirement
|
@@ -58,14 +58,14 @@ dependencies:
|
|
58
58
|
requirements:
|
59
59
|
- - "~>"
|
60
60
|
- !ruby/object:Gem::Version
|
61
|
-
version: '2.
|
61
|
+
version: '2.24'
|
62
62
|
type: :runtime
|
63
63
|
prerelease: false
|
64
64
|
version_requirements: !ruby/object:Gem::Requirement
|
65
65
|
requirements:
|
66
66
|
- - "~>"
|
67
67
|
- !ruby/object:Gem::Version
|
68
|
-
version: '2.
|
68
|
+
version: '2.24'
|
69
69
|
- !ruby/object:Gem::Dependency
|
70
70
|
name: url_verifier
|
71
71
|
requirement: !ruby/object:Gem::Requirement
|
@@ -80,20 +80,6 @@ dependencies:
|
|
80
80
|
- - "~>"
|
81
81
|
- !ruby/object:Gem::Version
|
82
82
|
version: '2.12'
|
83
|
-
- !ruby/object:Gem::Dependency
|
84
|
-
name: utf8_sanitizer
|
85
|
-
requirement: !ruby/object:Gem::Requirement
|
86
|
-
requirements:
|
87
|
-
- - "~>"
|
88
|
-
- !ruby/object:Gem::Version
|
89
|
-
version: '2.16'
|
90
|
-
type: :runtime
|
91
|
-
prerelease: false
|
92
|
-
version_requirements: !ruby/object:Gem::Requirement
|
93
|
-
requirements:
|
94
|
-
- - "~>"
|
95
|
-
- !ruby/object:Gem::Version
|
96
|
-
version: '2.16'
|
97
83
|
- !ruby/object:Gem::Dependency
|
98
84
|
name: bundler
|
99
85
|
requirement: !ruby/object:Gem::Requirement
|