ginspider 0.1.2 → 0.1.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/ginspider/analysis.rb +22 -2
- data/lib/ginspider/main.rb +11 -3
- data/lib/ginspider/version.rb +1 -1
- metadata +3 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 67ea88896c2c01988dabfbf948162dac258db6c5
|
4
|
+
data.tar.gz: 7ff7aafde36db2369390f18e0396dc823ce88e66
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 3df86b4407e8d6cd650b61879c1bc012de712af7c71b576aea345f83f94259f23131385ec106446ca6d5078da0d2e739a0b3a41fd0902aebdb304acc356d8ef2
|
7
|
+
data.tar.gz: 32d47cb4c00f76d41d1d855b7f7cb1297daa27fe1ba3ff6b2837bf41ccad1de62282bf794a74e9e793d323a809e74fb8ec4fdeeca9fc1155bdd6acc41af2abde
|
data/lib/ginspider/analysis.rb
CHANGED
@@ -22,6 +22,15 @@ module Ginspider
|
|
22
22
|
res_links = get_res_links(res_body, options)
|
23
23
|
return res_links
|
24
24
|
end
|
25
|
+
|
26
|
+
#
|
27
|
+
# return text of response with url and options
|
28
|
+
#
|
29
|
+
def self.res_text(url, options = {})
|
30
|
+
res_body = get_res_body(url)
|
31
|
+
res_text = get_res_text(res_body, options)
|
32
|
+
return res_text
|
33
|
+
end
|
25
34
|
|
26
35
|
private
|
27
36
|
|
@@ -46,11 +55,11 @@ module Ginspider
|
|
46
55
|
#
|
47
56
|
def self.get_res_links(res_body, options = {})
|
48
57
|
@noko_res = get_noko_res(res_body)
|
49
|
-
@
|
58
|
+
@noko_targets = @noko_res.css(options["element"])
|
50
59
|
|
51
60
|
links = []
|
52
61
|
i = 0
|
53
|
-
@
|
62
|
+
@noko_targets.each do |link|
|
54
63
|
links[i] = link["href"].strip
|
55
64
|
i = i + 1
|
56
65
|
end
|
@@ -58,6 +67,17 @@ module Ginspider
|
|
58
67
|
return links
|
59
68
|
end
|
60
69
|
|
70
|
+
#
|
71
|
+
# get all the text from nokogiri object
|
72
|
+
#
|
73
|
+
def self.get_res_text(res_body, options = {})
|
74
|
+
@noko_res = get_noko_res(res_body)
|
75
|
+
@noko_target = @noko_res.css(options["element"])
|
76
|
+
text = @noko_target.text
|
77
|
+
|
78
|
+
return text
|
79
|
+
end
|
80
|
+
|
61
81
|
end
|
62
82
|
|
63
83
|
end
|
data/lib/ginspider/main.rb
CHANGED
@@ -12,14 +12,14 @@ module Ginspider
|
|
12
12
|
#
|
13
13
|
def self.crawl(url, options = {})
|
14
14
|
basic_res = basic_res(url)
|
15
|
-
|
15
|
+
return basic_res
|
16
16
|
end
|
17
17
|
|
18
18
|
# define the main crawler
|
19
19
|
#
|
20
20
|
def self.crawl_with_noko(url, options = {})
|
21
21
|
noko_res = Analysis.noko_res(url)
|
22
|
-
|
22
|
+
return noko_res
|
23
23
|
end
|
24
24
|
|
25
25
|
#
|
@@ -27,7 +27,15 @@ module Ginspider
|
|
27
27
|
#
|
28
28
|
def self.links_of(url, options = {})
|
29
29
|
res_links = Analysis.res_links(url, options)
|
30
|
-
|
30
|
+
return res_links
|
31
|
+
end
|
32
|
+
|
33
|
+
#
|
34
|
+
# define the main method to get all texts
|
35
|
+
#
|
36
|
+
def self.text_of(url, options = {})
|
37
|
+
res_text = Analysis.res_text(url, options)
|
38
|
+
return res_text
|
31
39
|
end
|
32
40
|
|
33
41
|
private
|
data/lib/ginspider/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: ginspider
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- lancegin
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2016-07-
|
11
|
+
date: 2016-07-19 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -139,7 +139,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
139
139
|
version: '0'
|
140
140
|
requirements: []
|
141
141
|
rubyforge_project:
|
142
|
-
rubygems_version: 2.
|
142
|
+
rubygems_version: 2.6.6
|
143
143
|
signing_key:
|
144
144
|
specification_version: 4
|
145
145
|
summary: to analyze some websites
|