ginspider 0.1.2 → 0.1.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/ginspider/analysis.rb +22 -2
- data/lib/ginspider/main.rb +11 -3
- data/lib/ginspider/version.rb +1 -1
- metadata +3 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 67ea88896c2c01988dabfbf948162dac258db6c5
|
4
|
+
data.tar.gz: 7ff7aafde36db2369390f18e0396dc823ce88e66
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 3df86b4407e8d6cd650b61879c1bc012de712af7c71b576aea345f83f94259f23131385ec106446ca6d5078da0d2e739a0b3a41fd0902aebdb304acc356d8ef2
|
7
|
+
data.tar.gz: 32d47cb4c00f76d41d1d855b7f7cb1297daa27fe1ba3ff6b2837bf41ccad1de62282bf794a74e9e793d323a809e74fb8ec4fdeeca9fc1155bdd6acc41af2abde
|
data/lib/ginspider/analysis.rb
CHANGED
@@ -22,6 +22,15 @@ module Ginspider
|
|
22
22
|
res_links = get_res_links(res_body, options)
|
23
23
|
return res_links
|
24
24
|
end
|
25
|
+
|
26
|
+
#
|
27
|
+
# return text of response with url and options
|
28
|
+
#
|
29
|
+
def self.res_text(url, options = {})
|
30
|
+
res_body = get_res_body(url)
|
31
|
+
res_text = get_res_text(res_body, options)
|
32
|
+
return res_text
|
33
|
+
end
|
25
34
|
|
26
35
|
private
|
27
36
|
|
@@ -46,11 +55,11 @@ module Ginspider
|
|
46
55
|
#
|
47
56
|
def self.get_res_links(res_body, options = {})
|
48
57
|
@noko_res = get_noko_res(res_body)
|
49
|
-
@
|
58
|
+
@noko_targets = @noko_res.css(options["element"])
|
50
59
|
|
51
60
|
links = []
|
52
61
|
i = 0
|
53
|
-
@
|
62
|
+
@noko_targets.each do |link|
|
54
63
|
links[i] = link["href"].strip
|
55
64
|
i = i + 1
|
56
65
|
end
|
@@ -58,6 +67,17 @@ module Ginspider
|
|
58
67
|
return links
|
59
68
|
end
|
60
69
|
|
70
|
+
#
|
71
|
+
# get all the text from nokogiri object
|
72
|
+
#
|
73
|
+
def self.get_res_text(res_body, options = {})
|
74
|
+
@noko_res = get_noko_res(res_body)
|
75
|
+
@noko_target = @noko_res.css(options["element"])
|
76
|
+
text = @noko_target.text
|
77
|
+
|
78
|
+
return text
|
79
|
+
end
|
80
|
+
|
61
81
|
end
|
62
82
|
|
63
83
|
end
|
data/lib/ginspider/main.rb
CHANGED
@@ -12,14 +12,14 @@ module Ginspider
|
|
12
12
|
#
|
13
13
|
def self.crawl(url, options = {})
|
14
14
|
basic_res = basic_res(url)
|
15
|
-
|
15
|
+
return basic_res
|
16
16
|
end
|
17
17
|
|
18
18
|
# define the main crawler
|
19
19
|
#
|
20
20
|
def self.crawl_with_noko(url, options = {})
|
21
21
|
noko_res = Analysis.noko_res(url)
|
22
|
-
|
22
|
+
return noko_res
|
23
23
|
end
|
24
24
|
|
25
25
|
#
|
@@ -27,7 +27,15 @@ module Ginspider
|
|
27
27
|
#
|
28
28
|
def self.links_of(url, options = {})
|
29
29
|
res_links = Analysis.res_links(url, options)
|
30
|
-
|
30
|
+
return res_links
|
31
|
+
end
|
32
|
+
|
33
|
+
#
|
34
|
+
# define the main method to get all texts
|
35
|
+
#
|
36
|
+
def self.text_of(url, options = {})
|
37
|
+
res_text = Analysis.res_text(url, options)
|
38
|
+
return res_text
|
31
39
|
end
|
32
40
|
|
33
41
|
private
|
data/lib/ginspider/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: ginspider
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- lancegin
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2016-07-
|
11
|
+
date: 2016-07-19 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -139,7 +139,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
139
139
|
version: '0'
|
140
140
|
requirements: []
|
141
141
|
rubyforge_project:
|
142
|
-
rubygems_version: 2.
|
142
|
+
rubygems_version: 2.6.6
|
143
143
|
signing_key:
|
144
144
|
specification_version: 4
|
145
145
|
summary: to analyze some websites
|