four-chan 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2) hide show
  1. data/lib/four-chan.rb +62 -0
  2. metadata +45 -0
@@ -0,0 +1,62 @@
1
+ require "string-enumerable"
2
+ require "net/http"
3
+ require "nokogiri"
4
+ require "open-uri"
5
+ #A method should probably be made specifically to pull/process data.
6
+ #Main class
7
+ #Pages variable holds data concerning which pages are in this object
8
+ #Data should hold the actual data
9
+ #Everything else should be methods that extract stuff from data
10
+ class Fourchan
11
+ #Pull data from 4chan to create new Fourchan object
12
+ #Pages must be an array, each element being an array with two elements: a symbol corresponding to the board you wish to pull data from (without slashes, (ex :b or :sci)
13
+ #and an array containing the numbers of the pages to be pulled
14
+ def initialize(pages)
15
+ @pages = pages
16
+ @data = Hash[]
17
+ for i in pages
18
+ board = i[0]
19
+ for j in i[1]
20
+ key = [board, j]
21
+ path = "http://boards.4chan.org/" + board.to_s + "/" + j
22
+ doc = Nokogiri::Doc.new(open(path))
23
+ @data[key] = doc
24
+ end
25
+ end
26
+ end
27
+ #Resets all page data in the current object, pulling it from the web anew
28
+ def reset
29
+ self = Fourchan.new(@pages)
30
+ end
31
+ #Pulls a new page from the web, adding the data to the present object. Note: when #reset is called, pages added with this function are also reset.
32
+ #The format of page must be the same as one of the elements in the pages argument in the initialization function(2-element array containing symbol denoting board and enumerable denoting pages)
33
+ def addPage(page)
34
+ found = false
35
+ for i in @pages
36
+ if i[0] == page[0]
37
+ i[1] = (i[1] + page[1]).uniq
38
+ found = true
39
+ break
40
+ end
41
+ end
42
+ @pages += page unless found
43
+ board = page[0]
44
+ for i in page[1]
45
+ key = [board, i]
46
+ path = "http://boards.4chan.org/" + board.to_s + "/" + i
47
+ doc = Nokogiri::Doc.new(open(path))
48
+ @data[key] = doc
49
+ end
50
+ end
51
+ #Returns an array, each element being the text from one post on the given page
52
+ def postsText(board, page)
53
+ key = [board, page]
54
+ doc = @data[key]
55
+ raw = doc.xpath("//blockquote")
56
+ raw.delete do |a|
57
+ not (a["class"].include? "post")
58
+ end
59
+ raw
60
+ end
61
+ attr_accessor :data
62
+ end
metadata ADDED
@@ -0,0 +1,45 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: four-chan
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - Eigil Rischel
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2012-08-22 00:00:00.000000000 Z
13
+ dependencies: []
14
+ description: A very simplistic interface to pull data from the popular website fourchan
15
+ email: ayegill@gmail.com
16
+ executables: []
17
+ extensions: []
18
+ extra_rdoc_files: []
19
+ files:
20
+ - lib/four-chan.rb
21
+ homepage: http://rubygems.org/gems/four-chan
22
+ licenses: []
23
+ post_install_message:
24
+ rdoc_options: []
25
+ require_paths:
26
+ - lib
27
+ required_ruby_version: !ruby/object:Gem::Requirement
28
+ none: false
29
+ requirements:
30
+ - - ! '>='
31
+ - !ruby/object:Gem::Version
32
+ version: '0'
33
+ required_rubygems_version: !ruby/object:Gem::Requirement
34
+ none: false
35
+ requirements:
36
+ - - ! '>='
37
+ - !ruby/object:Gem::Version
38
+ version: '0'
39
+ requirements: []
40
+ rubyforge_project:
41
+ rubygems_version: 1.8.24
42
+ signing_key:
43
+ specification_version: 3
44
+ summary: fourchan interface
45
+ test_files: []