four-chan 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/four-chan.rb +62 -0
- metadata +45 -0
data/lib/four-chan.rb
ADDED
@@ -0,0 +1,62 @@
|
|
1
|
+
require "string-enumerable"
|
2
|
+
require "net/http"
|
3
|
+
require "nokogiri"
|
4
|
+
require "open-uri"
|
5
|
+
#A method should probably be made specifically to pull/process data.
|
6
|
+
#Main class
|
7
|
+
#Pages variable holds data concerning which pages are in this object
|
8
|
+
#Data should hold the actual data
|
9
|
+
#Everything else should be methods that extract stuff from data
|
10
|
+
class Fourchan
|
11
|
+
#Pull data from 4chan to create new Fourchan object
|
12
|
+
#Pages must be an array, each element being an array with two elements: a symbol corresponding to the board you wish to pull data from (without slashes, (ex :b or :sci)
|
13
|
+
#and an array containing the numbers of the pages to be pulled
|
14
|
+
def initialize(pages)
|
15
|
+
@pages = pages
|
16
|
+
@data = Hash[]
|
17
|
+
for i in pages
|
18
|
+
board = i[0]
|
19
|
+
for j in i[1]
|
20
|
+
key = [board, j]
|
21
|
+
path = "http://boards.4chan.org/" + board.to_s + "/" + j
|
22
|
+
doc = Nokogiri::Doc.new(open(path))
|
23
|
+
@data[key] = doc
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
27
|
+
#Resets all page data in the current object, pulling it from the web anew
|
28
|
+
def reset
|
29
|
+
self = Fourchan.new(@pages)
|
30
|
+
end
|
31
|
+
#Pulls a new page from the web, adding the data to the present object. Note: when #reset is called, pages added with this function are also reset.
|
32
|
+
#The format of page must be the same as one of the elements in the pages argument in the initialization function(2-element array containing symbol denoting board and enumerable denoting pages)
|
33
|
+
def addPage(page)
|
34
|
+
found = false
|
35
|
+
for i in @pages
|
36
|
+
if i[0] == page[0]
|
37
|
+
i[1] = (i[1] + page[1]).uniq
|
38
|
+
found = true
|
39
|
+
break
|
40
|
+
end
|
41
|
+
end
|
42
|
+
@pages += page unless found
|
43
|
+
board = page[0]
|
44
|
+
for i in page[1]
|
45
|
+
key = [board, i]
|
46
|
+
path = "http://boards.4chan.org/" + board.to_s + "/" + i
|
47
|
+
doc = Nokogiri::Doc.new(open(path))
|
48
|
+
@data[key] = doc
|
49
|
+
end
|
50
|
+
end
|
51
|
+
#Returns an array, each element being the text from one post on the given page
|
52
|
+
def postsText(board, page)
|
53
|
+
key = [board, page]
|
54
|
+
doc = @data[key]
|
55
|
+
raw = doc.xpath("//blockquote")
|
56
|
+
raw.delete do |a|
|
57
|
+
not (a["class"].include? "post")
|
58
|
+
end
|
59
|
+
raw
|
60
|
+
end
|
61
|
+
attr_accessor :data
|
62
|
+
end
|
metadata
ADDED
@@ -0,0 +1,45 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: four-chan
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.1
|
5
|
+
prerelease:
|
6
|
+
platform: ruby
|
7
|
+
authors:
|
8
|
+
- Eigil Rischel
|
9
|
+
autorequire:
|
10
|
+
bindir: bin
|
11
|
+
cert_chain: []
|
12
|
+
date: 2012-08-22 00:00:00.000000000 Z
|
13
|
+
dependencies: []
|
14
|
+
description: A very simplistic interface to pull data from the popular website fourchan
|
15
|
+
email: ayegill@gmail.com
|
16
|
+
executables: []
|
17
|
+
extensions: []
|
18
|
+
extra_rdoc_files: []
|
19
|
+
files:
|
20
|
+
- lib/four-chan.rb
|
21
|
+
homepage: http://rubygems.org/gems/four-chan
|
22
|
+
licenses: []
|
23
|
+
post_install_message:
|
24
|
+
rdoc_options: []
|
25
|
+
require_paths:
|
26
|
+
- lib
|
27
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
28
|
+
none: false
|
29
|
+
requirements:
|
30
|
+
- - ! '>='
|
31
|
+
- !ruby/object:Gem::Version
|
32
|
+
version: '0'
|
33
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
34
|
+
none: false
|
35
|
+
requirements:
|
36
|
+
- - ! '>='
|
37
|
+
- !ruby/object:Gem::Version
|
38
|
+
version: '0'
|
39
|
+
requirements: []
|
40
|
+
rubyforge_project:
|
41
|
+
rubygems_version: 1.8.24
|
42
|
+
signing_key:
|
43
|
+
specification_version: 3
|
44
|
+
summary: fourchan interface
|
45
|
+
test_files: []
|