gitscraper 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (4) hide show
  1. data/README +3 -0
  2. data/src/scraper.rb +35 -0
  3. data/src/search_url.rb +20 -0
  4. metadata +47 -0
data/README ADDED
@@ -0,0 +1,3 @@
1
+ see examples directory for how to use this code
2
+
3
+ author: kiwi.swhite.coder@gmail.com
data/src/scraper.rb ADDED
@@ -0,0 +1,35 @@
1
+ require 'open-uri'
2
+ require 'nokogiri'
3
+
4
+ class Scraper
5
+
6
+ def initialize(url)
7
+ @url = url
8
+ end
9
+
10
+ def element_number
11
+ page = Nokogiri::HTML(open(@url.search_url))
12
+ page.css("div[class=title]").text.scan(/\((\d+)\)/).flatten[0].to_i
13
+ end
14
+
15
+ def single_page_elements(page_index)
16
+ page = Nokogiri::HTML(open(@url.single_page_url(page_index)))
17
+ page.css("h2[class=title]").css("a").collect { |element| element["href"][1..-1] }
18
+ end
19
+
20
+ def page_number
21
+ (1.0 * element_number / page_size).ceil
22
+ end
23
+
24
+ def page_size
25
+ 30
26
+ end
27
+
28
+ def all_elements
29
+ (1..page_number).inject([]) do |elements, page_index|
30
+ elements + single_page_elements(page_index)
31
+ end
32
+ end
33
+
34
+ end
35
+
data/src/search_url.rb ADDED
@@ -0,0 +1,20 @@
1
+ class SearchURL
2
+
3
+ def initialize(type, param)
4
+ @type = type
5
+ @param = param
6
+ end
7
+
8
+ def search_url
9
+ "https://github.com/search?&q=#{criteria}&type=#{@type.to_s.capitalize}"
10
+ end
11
+
12
+ def single_page_url(page_index)
13
+ search_url + "&start_value=#{page_index}"
14
+ end
15
+
16
+ def criteria
17
+ @param.to_a.inject([]) { |res, pair| res << pair.join("%3A") }.join('+')
18
+ end
19
+
20
+ end
metadata ADDED
@@ -0,0 +1,47 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: gitscraper
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - kiwi
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2012-10-31 00:00:00.000000000 Z
13
+ dependencies: []
14
+ description:
15
+ email: kiwi.swhite.coder@gmail.com
16
+ executables: []
17
+ extensions: []
18
+ extra_rdoc_files: []
19
+ files:
20
+ - README
21
+ - src/scraper.rb
22
+ - src/search_url.rb
23
+ homepage: https://github.com/kiwiwin/GitScraper
24
+ licenses: []
25
+ post_install_message:
26
+ rdoc_options: []
27
+ require_paths:
28
+ - src
29
+ required_ruby_version: !ruby/object:Gem::Requirement
30
+ none: false
31
+ requirements:
32
+ - - ! '>='
33
+ - !ruby/object:Gem::Version
34
+ version: '0'
35
+ required_rubygems_version: !ruby/object:Gem::Requirement
36
+ none: false
37
+ requirements:
38
+ - - ! '>='
39
+ - !ruby/object:Gem::Version
40
+ version: '0'
41
+ requirements: []
42
+ rubyforge_project:
43
+ rubygems_version: 1.8.24
44
+ signing_key:
45
+ specification_version: 3
46
+ summary: scrape github stuff
47
+ test_files: []