ginspider 0.1.1 → 0.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +1 -0
- data/lib/ginspider/analysis.rb +40 -5
- data/lib/ginspider/main.rb +27 -9
- data/lib/ginspider/version.rb +1 -1
- data/lib/ginspider.rb +26 -1
- metadata +3 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 24178b5f71bc5c092364a9066975e62e453cebd8
|
4
|
+
data.tar.gz: 6bfd4f30b2ac4146f3c99ff056991be04d3a02e9
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 18be42fc8f3ef9cf67231039228aa42d0a47998396a8b65da21b943b32436ef847b29721d32d11eb2730ca11d6e6b09ecc36420188fcbcd575e5c8420a6c0cea
|
7
|
+
data.tar.gz: a36c97cbb54a850be74b2a569ce2ca55e268cf28339f82a263361ab94a65d2b198d3fd5c4cc26b52b4a8e3b56b1e1320cee775bc0e201ea44bfe8557bf828013
|
data/README.md
CHANGED
data/lib/ginspider/analysis.rb
CHANGED
@@ -9,18 +9,53 @@ module Ginspider
|
|
9
9
|
# return nokogiri response
|
10
10
|
#
|
11
11
|
def self.noko_res(url)
|
12
|
-
res_body =
|
13
|
-
|
14
|
-
return
|
12
|
+
res_body = get_res_body(url)
|
13
|
+
noko_res = get_noko_res(res_body)
|
14
|
+
return noko_res
|
15
|
+
end
|
16
|
+
|
17
|
+
#
|
18
|
+
# return links of response with url and options
|
19
|
+
#
|
20
|
+
def self.res_links(url, options = {})
|
21
|
+
res_body = get_res_body(url)
|
22
|
+
res_links = get_res_links(res_body, options)
|
23
|
+
return res_links
|
15
24
|
end
|
16
25
|
|
17
26
|
private
|
27
|
+
|
28
|
+
#
|
29
|
+
# get the response body
|
30
|
+
#
|
31
|
+
def self.get_res_body(url)
|
32
|
+
@res_body = Http.res_body(url)
|
33
|
+
return @res_body
|
34
|
+
end
|
35
|
+
|
18
36
|
#
|
19
37
|
# generate a nokogiri response
|
20
38
|
#
|
21
39
|
def self.get_noko_res(res_body)
|
22
|
-
noko_res = Nokogiri::HTML(res_body.to_s)
|
23
|
-
return noko_res
|
40
|
+
@noko_res = Nokogiri::HTML(res_body.to_s)
|
41
|
+
return @noko_res
|
42
|
+
end
|
43
|
+
|
44
|
+
#
|
45
|
+
# get all the links from nokogiri object
|
46
|
+
#
|
47
|
+
def self.get_res_links(res_body, options = {})
|
48
|
+
@noko_res = get_noko_res(res_body)
|
49
|
+
@links = @noko_res.css(options["element"])
|
50
|
+
|
51
|
+
links = []
|
52
|
+
i = 0
|
53
|
+
@links.each do |link|
|
54
|
+
links[i] = link["href"].strip
|
55
|
+
i = i + 1
|
56
|
+
end
|
57
|
+
|
58
|
+
return links
|
24
59
|
end
|
25
60
|
|
26
61
|
end
|
data/lib/ginspider/main.rb
CHANGED
@@ -5,21 +5,39 @@ require 'ginspider/analysis'
|
|
5
5
|
|
6
6
|
module Ginspider
|
7
7
|
|
8
|
-
#
|
9
|
-
# the main method to crawl a website
|
10
|
-
#
|
11
|
-
def Ginspider.crawl(url, options = {})
|
12
|
-
Main.crawl(url, options)
|
13
|
-
end
|
14
|
-
|
15
8
|
class Main
|
16
9
|
|
17
10
|
#
|
18
|
-
#
|
11
|
+
# get the basic response
|
19
12
|
#
|
20
13
|
def self.crawl(url, options = {})
|
14
|
+
basic_res = basic_res(url)
|
15
|
+
ap basic_res
|
16
|
+
end
|
17
|
+
|
18
|
+
# define the main crawler
|
19
|
+
#
|
20
|
+
def self.crawl_with_noko(url, options = {})
|
21
21
|
noko_res = Analysis.noko_res(url)
|
22
|
-
ap noko_res
|
22
|
+
ap noko_res
|
23
|
+
end
|
24
|
+
|
25
|
+
#
|
26
|
+
# define the main method to get all links
|
27
|
+
#
|
28
|
+
def self.links_of(url, options = {})
|
29
|
+
res_links = Analysis.res_links(url, options)
|
30
|
+
ap res_links
|
31
|
+
end
|
32
|
+
|
33
|
+
private
|
34
|
+
|
35
|
+
#
|
36
|
+
# get the basic response
|
37
|
+
#
|
38
|
+
def self.basic_res(url, options = {})
|
39
|
+
@basic_res = Http.basic_res(url)
|
40
|
+
return @basic_res
|
23
41
|
end
|
24
42
|
|
25
43
|
end
|
data/lib/ginspider/version.rb
CHANGED
data/lib/ginspider.rb
CHANGED
@@ -1,2 +1,27 @@
|
|
1
1
|
require 'rubygems'
|
2
|
-
require 'ginspider/main'
|
2
|
+
require 'ginspider/main'
|
3
|
+
|
4
|
+
module Ginspider
|
5
|
+
|
6
|
+
#
|
7
|
+
# the api method to crawl a website, and get the basic response
|
8
|
+
#
|
9
|
+
def Ginspider.crawl(url, options = {})
|
10
|
+
Main.crawl(url, options)
|
11
|
+
end
|
12
|
+
|
13
|
+
#
|
14
|
+
# the api method to get all links with url and options
|
15
|
+
#
|
16
|
+
def Ginspider.links_of(url, options = {})
|
17
|
+
Main.links_of(url, options)
|
18
|
+
end
|
19
|
+
|
20
|
+
#
|
21
|
+
# the api method to get text with url and options
|
22
|
+
#
|
23
|
+
def Ginspider.text_of(url, options = {})
|
24
|
+
Main.text_of(url, options)
|
25
|
+
end
|
26
|
+
|
27
|
+
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: ginspider
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- lancegin
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2016-07-
|
11
|
+
date: 2016-07-10 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -139,7 +139,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
139
139
|
version: '0'
|
140
140
|
requirements: []
|
141
141
|
rubyforge_project:
|
142
|
-
rubygems_version: 2.
|
142
|
+
rubygems_version: 2.5.1
|
143
143
|
signing_key:
|
144
144
|
specification_version: 4
|
145
145
|
summary: to analyze some websites
|