gitscraper 0.0.1
Sign up to get free protection for your applications and to get access to all the features.
- data/README +3 -0
- data/src/scraper.rb +35 -0
- data/src/search_url.rb +20 -0
- metadata +47 -0
data/README
ADDED
data/src/scraper.rb
ADDED
@@ -0,0 +1,35 @@
|
|
1
|
+
require 'open-uri'
|
2
|
+
require 'nokogiri'
|
3
|
+
|
4
|
+
class Scraper
|
5
|
+
|
6
|
+
def initialize(url)
|
7
|
+
@url = url
|
8
|
+
end
|
9
|
+
|
10
|
+
def element_number
|
11
|
+
page = Nokogiri::HTML(open(@url.search_url))
|
12
|
+
page.css("div[class=title]").text.scan(/\((\d+)\)/).flatten[0].to_i
|
13
|
+
end
|
14
|
+
|
15
|
+
def single_page_elements(page_index)
|
16
|
+
page = Nokogiri::HTML(open(@url.single_page_url(page_index)))
|
17
|
+
page.css("h2[class=title]").css("a").collect { |element| element["href"][1..-1] }
|
18
|
+
end
|
19
|
+
|
20
|
+
def page_number
|
21
|
+
(1.0 * element_number / page_size).ceil
|
22
|
+
end
|
23
|
+
|
24
|
+
def page_size
|
25
|
+
30
|
26
|
+
end
|
27
|
+
|
28
|
+
def all_elements
|
29
|
+
(1..page_number).inject([]) do |elements, page_index|
|
30
|
+
elements + single_page_elements(page_index)
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
end
|
35
|
+
|
data/src/search_url.rb
ADDED
@@ -0,0 +1,20 @@
|
|
1
|
+
class SearchURL
|
2
|
+
|
3
|
+
def initialize(type, param)
|
4
|
+
@type = type
|
5
|
+
@param = param
|
6
|
+
end
|
7
|
+
|
8
|
+
def search_url
|
9
|
+
"https://github.com/search?&q=#{criteria}&type=#{@type.to_s.capitalize}"
|
10
|
+
end
|
11
|
+
|
12
|
+
def single_page_url(page_index)
|
13
|
+
search_url + "&start_value=#{page_index}"
|
14
|
+
end
|
15
|
+
|
16
|
+
def criteria
|
17
|
+
@param.to_a.inject([]) { |res, pair| res << pair.join("%3A") }.join('+')
|
18
|
+
end
|
19
|
+
|
20
|
+
end
|
metadata
ADDED
@@ -0,0 +1,47 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: gitscraper
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.1
|
5
|
+
prerelease:
|
6
|
+
platform: ruby
|
7
|
+
authors:
|
8
|
+
- kiwi
|
9
|
+
autorequire:
|
10
|
+
bindir: bin
|
11
|
+
cert_chain: []
|
12
|
+
date: 2012-10-31 00:00:00.000000000 Z
|
13
|
+
dependencies: []
|
14
|
+
description:
|
15
|
+
email: kiwi.swhite.coder@gmail.com
|
16
|
+
executables: []
|
17
|
+
extensions: []
|
18
|
+
extra_rdoc_files: []
|
19
|
+
files:
|
20
|
+
- README
|
21
|
+
- src/scraper.rb
|
22
|
+
- src/search_url.rb
|
23
|
+
homepage: https://github.com/kiwiwin/GitScraper
|
24
|
+
licenses: []
|
25
|
+
post_install_message:
|
26
|
+
rdoc_options: []
|
27
|
+
require_paths:
|
28
|
+
- src
|
29
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
30
|
+
none: false
|
31
|
+
requirements:
|
32
|
+
- - ! '>='
|
33
|
+
- !ruby/object:Gem::Version
|
34
|
+
version: '0'
|
35
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
36
|
+
none: false
|
37
|
+
requirements:
|
38
|
+
- - ! '>='
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '0'
|
41
|
+
requirements: []
|
42
|
+
rubyforge_project:
|
43
|
+
rubygems_version: 1.8.24
|
44
|
+
signing_key:
|
45
|
+
specification_version: 3
|
46
|
+
summary: scrape github stuff
|
47
|
+
test_files: []
|