four-chan 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (2) hide show
  1. data/lib/four-chan.rb +62 -0
  2. metadata +45 -0
@@ -0,0 +1,62 @@
1
+ require "string-enumerable"
2
+ require "net/http"
3
+ require "nokogiri"
4
+ require "open-uri"
5
+ #A method should probably be made specifically to pull/process data.
6
+ #Main class
7
+ #Pages variable holds data concerning which pages are in this object
8
+ #Data should hold the actual data
9
+ #Everything else should be methods that extract stuff from data
10
+ class Fourchan
11
+ #Pull data from 4chan to create new Fourchan object
12
+ #Pages must be an array, each element being an array with two elements: a symbol corresponding to the board you wish to pull data from (without slashes, (ex :b or :sci)
13
+ #and an array containing the numbers of the pages to be pulled
14
+ def initialize(pages)
15
+ @pages = pages
16
+ @data = Hash[]
17
+ for i in pages
18
+ board = i[0]
19
+ for j in i[1]
20
+ key = [board, j]
21
+ path = "http://boards.4chan.org/" + board.to_s + "/" + j
22
+ doc = Nokogiri::Doc.new(open(path))
23
+ @data[key] = doc
24
+ end
25
+ end
26
+ end
27
+ #Resets all page data in the current object, pulling it from the web anew
28
+ def reset
29
+ self = Fourchan.new(@pages)
30
+ end
31
+ #Pulls a new page from the web, adding the data to the present object. Note: when #reset is called, pages added with this function are also reset.
32
+ #The format of page must be the same as one of the elements in the pages argument in the initialization function(2-element array containing symbol denoting board and enumerable denoting pages)
33
+ def addPage(page)
34
+ found = false
35
+ for i in @pages
36
+ if i[0] == page[0]
37
+ i[1] = (i[1] + page[1]).uniq
38
+ found = true
39
+ break
40
+ end
41
+ end
42
+ @pages += page unless found
43
+ board = page[0]
44
+ for i in page[1]
45
+ key = [board, i]
46
+ path = "http://boards.4chan.org/" + board.to_s + "/" + i
47
+ doc = Nokogiri::Doc.new(open(path))
48
+ @data[key] = doc
49
+ end
50
+ end
51
+ #Returns an array, each element being the text from one post on the given page
52
+ def postsText(board, page)
53
+ key = [board, page]
54
+ doc = @data[key]
55
+ raw = doc.xpath("//blockquote")
56
+ raw.delete do |a|
57
+ not (a["class"].include? "post")
58
+ end
59
+ raw
60
+ end
61
+ attr_accessor :data
62
+ end
metadata ADDED
@@ -0,0 +1,45 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: four-chan
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - Eigil Rischel
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2012-08-22 00:00:00.000000000 Z
13
+ dependencies: []
14
+ description: A very simplistic interface to pull data from the popular website fourchan
15
+ email: ayegill@gmail.com
16
+ executables: []
17
+ extensions: []
18
+ extra_rdoc_files: []
19
+ files:
20
+ - lib/four-chan.rb
21
+ homepage: http://rubygems.org/gems/four-chan
22
+ licenses: []
23
+ post_install_message:
24
+ rdoc_options: []
25
+ require_paths:
26
+ - lib
27
+ required_ruby_version: !ruby/object:Gem::Requirement
28
+ none: false
29
+ requirements:
30
+ - - ! '>='
31
+ - !ruby/object:Gem::Version
32
+ version: '0'
33
+ required_rubygems_version: !ruby/object:Gem::Requirement
34
+ none: false
35
+ requirements:
36
+ - - ! '>='
37
+ - !ruby/object:Gem::Version
38
+ version: '0'
39
+ requirements: []
40
+ rubyforge_project:
41
+ rubygems_version: 1.8.24
42
+ signing_key:
43
+ specification_version: 3
44
+ summary: fourchan interface
45
+ test_files: []