wlsearchscraper 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (2) hide show
  1. data/lib/wlsearchscraper.rb +25 -0
  2. metadata +48 -0
@@ -0,0 +1,25 @@
1
+ require 'nokogiri'
2
+ require 'open-uri'
3
+
4
+ class WLSearchScraper
5
+ def initialize(searchterms)
6
+ @searchterms = searchterms
7
+ @resultlist = Array.new
8
+ end
9
+
10
+ # Returns array of document IDs matching search terms
11
+ def scrape
12
+ @searchterms.gsub!(" ", "+")
13
+ url = "https://search.wikileaks.org/advanced?q=" + @searchterms + "&exclude_words=&words_title_only=&words_content_only=&publication_type[]=3"
14
+ html = Nokogiri::HTML(open(url))
15
+
16
+ html.css("h4").each do |h|
17
+ href = h.css("a")[0]["href"]
18
+ split = href.split("/")
19
+ cable = split[split.length-1].split("_a.html")
20
+ @resultlist.push(cable[0])
21
+ end
22
+
23
+ return @resultlist
24
+ end
25
+ end
metadata ADDED
@@ -0,0 +1,48 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: wlsearchscraper
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - M. C. McGrath
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2014-03-27 00:00:00.000000000 Z
13
+ dependencies: []
14
+ description: Gets a list of documents from the WikiLeaks search that match certain
15
+ terms.
16
+ email: shidash@shidash.com
17
+ executables: []
18
+ extensions: []
19
+ extra_rdoc_files: []
20
+ files:
21
+ - lib/wlsearchscraper.rb
22
+ homepage: https://github.com/Shidash/wlsearchscraper
23
+ licenses:
24
+ - GPL
25
+ post_install_message:
26
+ rdoc_options: []
27
+ require_paths:
28
+ - lib
29
+ required_ruby_version: !ruby/object:Gem::Requirement
30
+ none: false
31
+ requirements:
32
+ - - ! '>='
33
+ - !ruby/object:Gem::Version
34
+ version: '0'
35
+ required_rubygems_version: !ruby/object:Gem::Requirement
36
+ none: false
37
+ requirements:
38
+ - - ! '>='
39
+ - !ruby/object:Gem::Version
40
+ version: '0'
41
+ requirements: []
42
+ rubyforge_project:
43
+ rubygems_version: 1.8.23
44
+ signing_key:
45
+ specification_version: 3
46
+ summary: Gets list of documents from WikiLeaks search.
47
+ test_files: []
48
+ has_rdoc: