x2ch 0.9.2 → 0.9.3

Sign up to get free protection for your applications and to get access to all the features.
Files changed (2) hide show
  1. data/lib/x2ch.rb +226 -226
  2. metadata +4 -4
data/lib/x2ch.rb CHANGED
@@ -5,230 +5,230 @@ require 'kconv'
5
5
  require 'zlib'
6
6
 
7
7
  module X2CH
8
- class Bbs
9
- attr_accessor :categories
10
-
11
- def initialize()
12
- @categories = []
13
- end
14
-
15
- def [](cname)
16
- @categories.each{|c|
17
- return c if c.name == cname
18
- }
19
- nil
20
- end
21
-
22
- def push(category)
23
- @categories << category
24
- end
25
-
26
- def each(&blk)
27
- @categories.each{|c|
28
- yield c
29
- }
30
- end
31
-
32
- def self.load()
33
- BbsMenu.parse(BbsMenu.download)
34
- end
35
- end
36
-
37
- class Category
38
- attr_accessor :name, :boards
39
-
40
- def initialize(name)
41
- @name = name
42
- @boards = []
43
- end
44
-
45
- def [](bname)
46
- @boards.each{|b|
47
- return b if b.name == bname
48
- }
49
- nil
50
- end
51
-
52
- def push(board)
53
- @boards << board
54
- end
55
-
56
- def each(&blk)
57
- @boards.each{|b|
58
- yield b
59
- }
60
- end
61
- end
62
-
63
- class Board
64
- attr_accessor :url, :name
65
-
66
- def initialize(url, name)
67
- @url, @name = url, name
68
- end
69
-
70
- def threads()
71
- Subject.parse(@url, Subject.download(@url + '/subject.txt'))
72
- end
73
-
74
- def each(&blk)
75
- threads.each{|t|
76
- yield t
77
- }
78
- end
79
- end
80
-
81
- class Thread
82
- attr_accessor :url, :dat, :name, :num
83
-
84
- def initialize(url, dat, name, num)
85
- @url, @dat, @name, @num = url, dat, name, num
86
- end
87
-
88
- def posts(if_modified_since = nil, range = nil)
89
- if @url.match(/machi.to/)
90
- part = @url.match(/^(http:\/\/.+?)\/(.+?)\//).to_a
91
- res = Dat.download(part[1] + "/bbs/offlaw.cgi/" + part[2] + '/' + @dat.sub('.cgi', ''), if_modified_since, range)
92
- else
93
- res = Dat.download(@url + "dat/" + @dat, if_modified_since, range)
94
- end
95
- ArrayResponse.new(Dat.parse(res), res.status, res.last_modified, res.content_encoding, res.body_size)
96
- end
97
-
98
- def each(&blk)
99
- posts.each{|p|
100
- yield p
101
- }
102
- end
103
- end
104
-
105
- class Post
106
- attr_accessor :name, :mail, :metadata, :body
107
-
108
- def initialize(name, mail, metadata, body)
109
- @name, @mail, @metadata, @body = name, mail, metadata, body
110
- end
111
- end
112
-
113
- class Agent
114
- def self.download(url, if_modified_since = nil, range = nil)
115
- header = {"User-Agent" => "Monazilla/1.00 (x2ch/0.9.1)", "Accept-Encoding" => 'gzip'}
116
- if if_modified_since
117
- header["If-Modified-Since"] = if_modified_since
118
- end
119
- if range
120
- header["Range"] = range
121
- end
122
- begin
123
- res = open(url, header){|f|
124
- body = nil
125
- if f.content_encoding.index('gzip')
126
- body = Zlib::GzipReader.new(f).read.toutf8
127
- else
128
- body = f.read.toutf8
129
- end
130
- [body, f.status, f.last_modified, f.content_encoding, body.size]
131
- }
132
- rescue OpenURI::HTTPError => e
133
- raise DownloadError.new(e.message)
134
- end
135
- StringResponse.new(res[0], res[1], res[2], res[3], res[4])
136
- end
137
- end
138
-
139
- class DownloadError < StandardError ; end
140
-
141
- module Response
142
- attr_accessor :status, :last_modified, :content_encoding, :body_size
143
-
144
- def initialize(obj, status, last_modified, content_encoding, body_size)
145
- @status, @last_modified, @content_encoding, @body_size = status, last_modified, content_encoding, body_size
146
- super(obj)
147
- end
148
- end
149
-
150
- class StringResponse < String
151
- include Response
152
- end
153
-
154
- class ArrayResponse < Array
155
- include Response
156
- end
157
-
158
- class BbsMenu
159
- IGNORE_CATEGORIES = ['特別企画', 'チャット', 'ツール類']
160
- IGNORE_BOARDS = ['2chプロジェクト', 'いろいろランク']
161
-
162
- def self.download
163
- Agent.download("http://menu.2ch.net/bbsmenu.html")
164
- end
165
-
166
- def self.parse(html)
167
- bbs = Bbs.new
168
- category = nil
169
- html.each_line{|l|
170
- cname = l.match(/<BR><BR><B>(.+?)<\/B><BR>/).to_a[1]
171
- if cname
172
- if IGNORE_CATEGORIES.include?(cname)
173
- category = nil
174
- else
175
- category = Category.new(cname)
176
- bbs.push(category)
177
- end
178
-
179
- next
180
- end
181
-
182
- next unless category
183
-
184
- b = l.match(/<A HREF=(http:\/\/.*(?:\.2ch\.net|\.bbspink\.com|\.machi\.to)[^\s]*).*>(.+)<\/A>/).to_a
185
- if b[0]
186
- next if IGNORE_BOARDS.include?(b[2])
187
-
188
- board = Board.new(b[1], b[2])
189
- category.push(board)
190
- end
191
- }
192
- bbs
193
- end
194
- end
195
-
196
- class Subject
197
- def self.download(url)
198
- Agent.download(url)
199
- end
200
-
201
- def self.parse(url, subject)
202
- threads = []
203
- subject.each_line{|l|
204
- m = l.match(/^(\d+\.(?:dat|cgi))(?:<>|,)(.+)\((\d+)\)$/).to_a
205
- if m[0]
206
- threads << Thread.new(url, m[1], m[2], m[3].to_i)
207
- end
208
- }
209
- threads
210
- end
211
- end
212
-
213
- class Dat
214
- def self.download(url, if_modified_since = nil, range = nil)
215
- Agent.download(url, if_modified_since, range)
216
- end
217
-
218
- def self.parse(dat)
219
- posts = []
220
- dat.each_line{|l|
221
- m = l.match(/^(\d+)<>(.+?)<>(.*?)<>(.*?)<>(.+)<>.*$/).to_a
222
- if m[0]
223
- posts << Post.new(m[2], m[3], m[4], m[5])
224
- else
225
- m = l.match(/^(.+?)<>(.*?)<>(.*?)<>(.+)<>.*$/).to_a
226
- if m[0]
227
- posts << Post.new(m[1], m[2], m[3], m[4])
228
- end
229
- end
230
- }
231
- posts
232
- end
233
- end
8
+ class Bbs
9
+ attr_accessor :categories
10
+
11
+ def initialize()
12
+ @categories = []
13
+ end
14
+
15
+ def [](cname)
16
+ @categories.each{|c|
17
+ return c if c.name == cname
18
+ }
19
+ nil
20
+ end
21
+
22
+ def push(category)
23
+ @categories << category
24
+ end
25
+
26
+ def each(&blk)
27
+ @categories.each{|c|
28
+ yield c
29
+ }
30
+ end
31
+
32
+ def self.load()
33
+ BbsMenu.parse(BbsMenu.download)
34
+ end
35
+ end
36
+
37
+ class Category
38
+ attr_accessor :name, :boards
39
+
40
+ def initialize(name)
41
+ @name = name
42
+ @boards = []
43
+ end
44
+
45
+ def [](bname)
46
+ @boards.each{|b|
47
+ return b if b.name == bname
48
+ }
49
+ nil
50
+ end
51
+
52
+ def push(board)
53
+ @boards << board
54
+ end
55
+
56
+ def each(&blk)
57
+ @boards.each{|b|
58
+ yield b
59
+ }
60
+ end
61
+ end
62
+
63
+ class Board
64
+ attr_accessor :url, :name
65
+
66
+ def initialize(url, name)
67
+ @url, @name = url, name
68
+ end
69
+
70
+ def threads()
71
+ Subject.parse(@url, Subject.download(@url + '/subject.txt'))
72
+ end
73
+
74
+ def each(&blk)
75
+ threads.each{|t|
76
+ yield t
77
+ }
78
+ end
79
+ end
80
+
81
+ class Thread
82
+ attr_accessor :url, :dat, :name, :num
83
+
84
+ def initialize(url, dat, name, num)
85
+ @url, @dat, @name, @num = url, dat, name, num
86
+ end
87
+
88
+ def posts(if_modified_since = nil, range = nil)
89
+ if @url.match(/machi.to/)
90
+ part = @url.match(/^(http:\/\/.+?)\/(.+?)\//).to_a
91
+ res = Dat.download(part[1] + "/bbs/offlaw.cgi/" + part[2] + '/' + @dat.sub('.cgi', ''), if_modified_since, range)
92
+ else
93
+ res = Dat.download(@url + "dat/" + @dat, if_modified_since, range)
94
+ end
95
+ ArrayResponse.new(Dat.parse(res), res.status, res.last_modified, res.content_encoding, res.body_size)
96
+ end
97
+
98
+ def each(&blk)
99
+ posts.each{|p|
100
+ yield p
101
+ }
102
+ end
103
+ end
104
+
105
+ class Post
106
+ attr_accessor :name, :mail, :metadata, :body
107
+
108
+ def initialize(name, mail, metadata, body)
109
+ @name, @mail, @metadata, @body = name, mail, metadata, body
110
+ end
111
+ end
112
+
113
+ class Agent
114
+ def self.download(url, if_modified_since = nil, range = nil)
115
+ header = {"User-Agent" => "Monazilla/1.00 (x2ch/0.9.1)", "Accept-Encoding" => 'gzip'}
116
+ if if_modified_since
117
+ header["If-Modified-Since"] = if_modified_since
118
+ end
119
+ if range
120
+ header["Range"] = range
121
+ end
122
+ begin
123
+ res = open(url, header){|f|
124
+ body = nil
125
+ if f.content_encoding.index('gzip')
126
+ body = Zlib::GzipReader.new(f).read.toutf8
127
+ else
128
+ body = f.read.toutf8
129
+ end
130
+ [body, f.status, f.last_modified, f.content_encoding, body.size]
131
+ }
132
+ rescue OpenURI::HTTPError => e
133
+ raise DownloadError.new(e.message)
134
+ end
135
+ StringResponse.new(res[0], res[1], res[2], res[3], res[4])
136
+ end
137
+ end
138
+
139
+ class DownloadError < StandardError ; end
140
+
141
+ module Response
142
+ attr_accessor :status, :last_modified, :content_encoding, :body_size
143
+
144
+ def initialize(obj, status, last_modified, content_encoding, body_size)
145
+ @status, @last_modified, @content_encoding, @body_size = status, last_modified, content_encoding, body_size
146
+ super(obj)
147
+ end
148
+ end
149
+
150
+ class StringResponse < String
151
+ include Response
152
+ end
153
+
154
+ class ArrayResponse < Array
155
+ include Response
156
+ end
157
+
158
+ class BbsMenu
159
+ IGNORE_CATEGORIES = ['特別企画', 'チャット', 'ツール類']
160
+ IGNORE_BOARDS = ['2chプロジェクト', 'いろいろランク']
161
+
162
+ def self.download
163
+ Agent.download("http://menu.2ch.sc/bbsmenu.html")
164
+ end
165
+
166
+ def self.parse(html)
167
+ bbs = Bbs.new
168
+ category = nil
169
+ html.each_line{|l|
170
+ cname = l.match(/<BR><BR><B>(.+?)<\/B><BR>/).to_a[1]
171
+ if cname
172
+ if IGNORE_CATEGORIES.include?(cname)
173
+ category = nil
174
+ else
175
+ category = Category.new(cname)
176
+ bbs.push(category)
177
+ end
178
+
179
+ next
180
+ end
181
+
182
+ next unless category
183
+
184
+ b = l.match(/<A HREF=(http:\/\/.*(?:\.2ch\.sc|\.bbspink\.com|\.machi\.to)[^\s]*).*>(.+)<\/A>/).to_a
185
+ if b[0]
186
+ next if IGNORE_BOARDS.include?(b[2])
187
+
188
+ board = Board.new(b[1], b[2])
189
+ category.push(board)
190
+ end
191
+ }
192
+ bbs
193
+ end
194
+ end
195
+
196
+ class Subject
197
+ def self.download(url)
198
+ Agent.download(url)
199
+ end
200
+
201
+ def self.parse(url, subject)
202
+ threads = []
203
+ subject.each_line{|l|
204
+ m = l.match(/^(\d+\.(?:dat|cgi))(?:<>|,)(.+)\((\d+)\)$/).to_a
205
+ if m[0]
206
+ threads << Thread.new(url, m[1], m[2], m[3].to_i)
207
+ end
208
+ }
209
+ threads
210
+ end
211
+ end
212
+
213
+ class Dat
214
+ def self.download(url, if_modified_since = nil, range = nil)
215
+ Agent.download(url, if_modified_since, range)
216
+ end
217
+
218
+ def self.parse(dat)
219
+ posts = []
220
+ dat.each_line{|l|
221
+ m = l.match(/^(\d+)<>(.+?)<>(.*?)<>(.*?)<>(.+)<>.*$/).to_a
222
+ if m[0]
223
+ posts << Post.new(m[2], m[3], m[4], m[5])
224
+ else
225
+ m = l.match(/^(.+?)<>(.*?)<>(.*?)<>(.+)<>.*$/).to_a
226
+ if m[0]
227
+ posts << Post.new(m[1], m[2], m[3], m[4])
228
+ end
229
+ end
230
+ }
231
+ posts
232
+ end
233
+ end
234
234
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: x2ch
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.9.2
4
+ version: 0.9.3
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,9 +9,9 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2013-10-27 00:00:00.000000000 Z
12
+ date: 2015-03-26 00:00:00.000000000 Z
13
13
  dependencies: []
14
- description: 2ch downloader and parser library
14
+ description: 2ch.sc downloader and parser library
15
15
  email: mail@xmisao.com
16
16
  executables: []
17
17
  extensions: []
@@ -42,5 +42,5 @@ rubyforge_project:
42
42
  rubygems_version: 1.8.23
43
43
  signing_key:
44
44
  specification_version: 3
45
- summary: 2ch downloader and parser library
45
+ summary: 2ch.sc downloader and parser library
46
46
  test_files: []