gitscraper 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (4) hide show
  1. data/README +3 -0
  2. data/src/scraper.rb +35 -0
  3. data/src/search_url.rb +20 -0
  4. metadata +47 -0
data/README ADDED
@@ -0,0 +1,3 @@
1
+ see examples directory for how to use this code
2
+
3
+ author: kiwi.swhite.coder@gmail.com
data/src/scraper.rb ADDED
@@ -0,0 +1,35 @@
1
+ require 'open-uri'
2
+ require 'nokogiri'
3
+
4
+ class Scraper
5
+
6
+ def initialize(url)
7
+ @url = url
8
+ end
9
+
10
+ def element_number
11
+ page = Nokogiri::HTML(open(@url.search_url))
12
+ page.css("div[class=title]").text.scan(/\((\d+)\)/).flatten[0].to_i
13
+ end
14
+
15
+ def single_page_elements(page_index)
16
+ page = Nokogiri::HTML(open(@url.single_page_url(page_index)))
17
+ page.css("h2[class=title]").css("a").collect { |element| element["href"][1..-1] }
18
+ end
19
+
20
+ def page_number
21
+ (1.0 * element_number / page_size).ceil
22
+ end
23
+
24
+ def page_size
25
+ 30
26
+ end
27
+
28
+ def all_elements
29
+ (1..page_number).inject([]) do |elements, page_index|
30
+ elements + single_page_elements(page_index)
31
+ end
32
+ end
33
+
34
+ end
35
+
data/src/search_url.rb ADDED
@@ -0,0 +1,20 @@
1
+ class SearchURL
2
+
3
+ def initialize(type, param)
4
+ @type = type
5
+ @param = param
6
+ end
7
+
8
+ def search_url
9
+ "https://github.com/search?&q=#{criteria}&type=#{@type.to_s.capitalize}"
10
+ end
11
+
12
+ def single_page_url(page_index)
13
+ search_url + "&start_value=#{page_index}"
14
+ end
15
+
16
+ def criteria
17
+ @param.to_a.inject([]) { |res, pair| res << pair.join("%3A") }.join('+')
18
+ end
19
+
20
+ end
metadata ADDED
@@ -0,0 +1,47 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: gitscraper
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - kiwi
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2012-10-31 00:00:00.000000000 Z
13
+ dependencies: []
14
+ description:
15
+ email: kiwi.swhite.coder@gmail.com
16
+ executables: []
17
+ extensions: []
18
+ extra_rdoc_files: []
19
+ files:
20
+ - README
21
+ - src/scraper.rb
22
+ - src/search_url.rb
23
+ homepage: https://github.com/kiwiwin/GitScraper
24
+ licenses: []
25
+ post_install_message:
26
+ rdoc_options: []
27
+ require_paths:
28
+ - src
29
+ required_ruby_version: !ruby/object:Gem::Requirement
30
+ none: false
31
+ requirements:
32
+ - - ! '>='
33
+ - !ruby/object:Gem::Version
34
+ version: '0'
35
+ required_rubygems_version: !ruby/object:Gem::Requirement
36
+ none: false
37
+ requirements:
38
+ - - ! '>='
39
+ - !ruby/object:Gem::Version
40
+ version: '0'
41
+ requirements: []
42
+ rubyforge_project:
43
+ rubygems_version: 1.8.24
44
+ signing_key:
45
+ specification_version: 3
46
+ summary: scrape github stuff
47
+ test_files: []