Wiki_Category_Extractor 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (2) hide show
  1. data/lib/Wiki_Category_Extractor.rb +40 -0
  2. metadata +45 -0
@@ -0,0 +1,40 @@
1
+ class Wiki_Category_Extractor
2
+ def self.extract(a,pagelimit)
3
+ @pages = {}
4
+ @categories = []
5
+ @cat_links = []
6
+
7
+ @categories << a
8
+ @cat_links << "http://en.wikipedia.org/wiki/Category:" + a
9
+
10
+ while @categories != []
11
+ @doc = Nokogiri::HTML(open(@cat_links[0]))
12
+ @x = @doc.css("#mw-subcategories")
13
+ j = 0
14
+ while j < @x.css("a").length
15
+ @categories << @x.css("a")[j].text
16
+ @cat_links << "http://en.wikipedia.org" + @x.css("a")[j].attributes["href"].value
17
+ j = j + 1
18
+ end
19
+
20
+ @y = @doc.css("#mw-pages")
21
+ j = 0
22
+ while j < @y.css("a").length
23
+ if @y.css("a")[j].text != ""
24
+ @pages[@y.css("a")[j].text] = "http://en.wikipedia.org" + @y.css("a")[j].attributes["href"].value
25
+ end
26
+ j = j + 1
27
+ end
28
+
29
+ if @pages.length > pagelimit
30
+ break
31
+ end
32
+
33
+ puts @categories[0]
34
+ @categories.delete_at(0)
35
+ @cat_links.delete_at(0)
36
+
37
+ end
38
+ return @pages
39
+ end
40
+ end
metadata ADDED
@@ -0,0 +1,45 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: Wiki_Category_Extractor
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - Prateek Papriwal
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2013-12-20 00:00:00.000000000 Z
13
+ dependencies: []
14
+ description: Category Extractor
15
+ email: papriwalprateek@gmail.com
16
+ executables: []
17
+ extensions: []
18
+ extra_rdoc_files: []
19
+ files:
20
+ - lib/Wiki_Category_Extractor.rb
21
+ homepage: http://rubygems.org/gems/Wiki_Category_Extractor
22
+ licenses: []
23
+ post_install_message:
24
+ rdoc_options: []
25
+ require_paths:
26
+ - lib
27
+ required_ruby_version: !ruby/object:Gem::Requirement
28
+ none: false
29
+ requirements:
30
+ - - ! '>='
31
+ - !ruby/object:Gem::Version
32
+ version: '0'
33
+ required_rubygems_version: !ruby/object:Gem::Requirement
34
+ none: false
35
+ requirements:
36
+ - - ! '>='
37
+ - !ruby/object:Gem::Version
38
+ version: '0'
39
+ requirements: []
40
+ rubyforge_project:
41
+ rubygems_version: 1.8.11
42
+ signing_key:
43
+ specification_version: 3
44
+ summary: ! '!!!'
45
+ test_files: []