link_scraper 1.02 → 2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +1 -1
- data/Rakefile +1 -4
- data/lib/link_scraper/scrape.rb +0 -1
- data/lib/link_scraper/version.rb +1 -1
- data/link_scraper.gemspec +3 -9
- metadata +6 -20
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: f157151d90a4cbcaa975c3ea1fd4860898d0bd3347e9ac68657253e10c27e3ff
|
4
|
+
data.tar.gz: 8e9b320f0a7d2491e46777620e13439b0002d037a596a8339d08f5da4eb93737
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 89a161348ff507dc0265ee444eb16616d483aebadfaa7491db73c7b2ee227d8696e5e1aada536e43932aa625f8cc8ccaa2a0f343a0228204118f98140214ad9f
|
7
|
+
data.tar.gz: dce0719315e35776ce7348d57de9128a9104ece9e7404025286195aa6e031724d512811ca67fda627c9376f6a7d8a939f3cce3d00395354851db6421ac63e54f
|
data/README.md
CHANGED
@@ -39,7 +39,7 @@ path_criteria = {
|
|
39
39
|
neg_criteria: %w[drought school]
|
40
40
|
}
|
41
41
|
|
42
|
-
scraper = LinkScraper::Scrape.new(text_criteria: text_criteria, path_criteria: path_criteria)
|
42
|
+
scraper = LinkScraper::Scrape.new({text_criteria: text_criteria, path_criteria: path_criteria})
|
43
43
|
scraped_links = scraper.start('https://en.wikipedia.org/wiki/Austin%2C_Texas')
|
44
44
|
```
|
45
45
|
|
data/Rakefile
CHANGED
@@ -18,7 +18,6 @@ task :console do
|
|
18
18
|
ARGV.clear
|
19
19
|
|
20
20
|
scraped_links = run_link_scraper
|
21
|
-
binding.pry
|
22
21
|
|
23
22
|
IRB.start
|
24
23
|
end
|
@@ -36,10 +35,8 @@ def run_link_scraper
|
|
36
35
|
neg_criteria: %w[drought school]
|
37
36
|
}
|
38
37
|
|
39
|
-
scraper = LinkScraper::Scrape.new(text_criteria: text_criteria, path_criteria: path_criteria)
|
38
|
+
scraper = LinkScraper::Scrape.new({text_criteria: text_criteria, path_criteria: path_criteria})
|
40
39
|
scraped_links = scraper.start('https://en.wikipedia.org/wiki/Austin%2C_Texas')
|
41
40
|
|
42
|
-
binding.pry
|
43
|
-
|
44
41
|
# scraper = LinkScraper::Scrape.new(WebsCriteria.all_scrub_web_criteria)
|
45
42
|
end
|
data/lib/link_scraper/scrape.rb
CHANGED
data/lib/link_scraper/version.rb
CHANGED
data/link_scraper.gemspec
CHANGED
@@ -22,12 +22,6 @@ Gem::Specification.new do |spec|
|
|
22
22
|
'public gem pushes.'
|
23
23
|
end
|
24
24
|
|
25
|
-
# Specify which files should be added to the gem when it is released.
|
26
|
-
# The `git ls-files -z` loads the files in the RubyGem that have been added into git.
|
27
|
-
# spec.files = Dir.chdir(File.expand_path('..', __FILE__)) do
|
28
|
-
# `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
|
29
|
-
# end
|
30
|
-
|
31
25
|
spec.files = `git ls-files -z`.split("\x0").reject do |f|
|
32
26
|
f.match(%r{^(test|spec|features)/})
|
33
27
|
end
|
@@ -38,11 +32,11 @@ Gem::Specification.new do |spec|
|
|
38
32
|
|
39
33
|
spec.required_ruby_version = '~> 2.5.1'
|
40
34
|
spec.add_dependency 'activesupport', '~> 5.2'
|
41
|
-
spec.add_dependency 'crm_formatter', '~> 2.
|
35
|
+
spec.add_dependency 'crm_formatter', '~> 2.65'
|
42
36
|
spec.add_dependency 'mechanizer', '~> 1.12'
|
43
|
-
spec.add_dependency 'scrub_db', '~> 2.
|
37
|
+
spec.add_dependency 'scrub_db', '~> 2.24'
|
44
38
|
spec.add_dependency 'url_verifier', '~> 2.12'
|
45
|
-
spec.add_dependency 'utf8_sanitizer', '~> 2.16'
|
39
|
+
# spec.add_dependency 'utf8_sanitizer', '~> 2.16'
|
46
40
|
|
47
41
|
# spec.add_dependency "activesupport-inflector", ['~> 0.1.0']
|
48
42
|
spec.add_development_dependency 'bundler', '~> 1.16', '>= 1.16.2'
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: link_scraper
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: '
|
4
|
+
version: '2.0'
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Adam Booth
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2018-07-
|
11
|
+
date: 2018-07-10 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: activesupport
|
@@ -30,14 +30,14 @@ dependencies:
|
|
30
30
|
requirements:
|
31
31
|
- - "~>"
|
32
32
|
- !ruby/object:Gem::Version
|
33
|
-
version: '2.
|
33
|
+
version: '2.65'
|
34
34
|
type: :runtime
|
35
35
|
prerelease: false
|
36
36
|
version_requirements: !ruby/object:Gem::Requirement
|
37
37
|
requirements:
|
38
38
|
- - "~>"
|
39
39
|
- !ruby/object:Gem::Version
|
40
|
-
version: '2.
|
40
|
+
version: '2.65'
|
41
41
|
- !ruby/object:Gem::Dependency
|
42
42
|
name: mechanizer
|
43
43
|
requirement: !ruby/object:Gem::Requirement
|
@@ -58,14 +58,14 @@ dependencies:
|
|
58
58
|
requirements:
|
59
59
|
- - "~>"
|
60
60
|
- !ruby/object:Gem::Version
|
61
|
-
version: '2.
|
61
|
+
version: '2.24'
|
62
62
|
type: :runtime
|
63
63
|
prerelease: false
|
64
64
|
version_requirements: !ruby/object:Gem::Requirement
|
65
65
|
requirements:
|
66
66
|
- - "~>"
|
67
67
|
- !ruby/object:Gem::Version
|
68
|
-
version: '2.
|
68
|
+
version: '2.24'
|
69
69
|
- !ruby/object:Gem::Dependency
|
70
70
|
name: url_verifier
|
71
71
|
requirement: !ruby/object:Gem::Requirement
|
@@ -80,20 +80,6 @@ dependencies:
|
|
80
80
|
- - "~>"
|
81
81
|
- !ruby/object:Gem::Version
|
82
82
|
version: '2.12'
|
83
|
-
- !ruby/object:Gem::Dependency
|
84
|
-
name: utf8_sanitizer
|
85
|
-
requirement: !ruby/object:Gem::Requirement
|
86
|
-
requirements:
|
87
|
-
- - "~>"
|
88
|
-
- !ruby/object:Gem::Version
|
89
|
-
version: '2.16'
|
90
|
-
type: :runtime
|
91
|
-
prerelease: false
|
92
|
-
version_requirements: !ruby/object:Gem::Requirement
|
93
|
-
requirements:
|
94
|
-
- - "~>"
|
95
|
-
- !ruby/object:Gem::Version
|
96
|
-
version: '2.16'
|
97
83
|
- !ruby/object:Gem::Dependency
|
98
84
|
name: bundler
|
99
85
|
requirement: !ruby/object:Gem::Requirement
|