80legs 0.1.0 → 0.2.0
Sign up to get free protection for your applications and to get access to all the features.
- data/VERSION +1 -1
- data/lib/eighty_legs/eighty_format.rb +16 -4
- data/test/test_data/wikipedia.80.gz +0 -0
- data/test/test_eighty_format.rb +13 -2
- metadata +5 -4
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.
|
1
|
+
0.2.0
|
@@ -1,16 +1,27 @@
|
|
1
1
|
module EightyLegs
|
2
2
|
class EightyFormat
|
3
3
|
def initialize(filename_or_io)
|
4
|
-
|
5
|
-
|
4
|
+
if filename_or_io.is_a?(String)
|
5
|
+
initialize_with_filename(filename_or_io)
|
6
6
|
elsif filename_or_io.is_a?(IO)
|
7
|
-
filename_or_io
|
7
|
+
initialize_with_io(filename_or_io)
|
8
8
|
else
|
9
9
|
raise TypeError.new(filename_or_io.class)
|
10
10
|
end
|
11
11
|
check_for_classid_and_version()
|
12
12
|
end
|
13
13
|
|
14
|
+
def initialize_with_filename(filename)
|
15
|
+
initialize_with_io(File.open(filename))
|
16
|
+
end
|
17
|
+
|
18
|
+
def initialize_with_io(io)
|
19
|
+
@io = Zlib::GzipReader.new(io)
|
20
|
+
rescue Zlib::GzipFile::Error
|
21
|
+
@io = io
|
22
|
+
@io.rewind
|
23
|
+
end
|
24
|
+
|
14
25
|
def each(&blk)
|
15
26
|
while not @io.eof?
|
16
27
|
url_size = @io.read(4).unpack("i").first
|
@@ -19,7 +30,8 @@ module EightyLegs
|
|
19
30
|
data = @io.read(data_size)
|
20
31
|
blk.call(Entry.new(url, data))
|
21
32
|
end
|
22
|
-
@io.
|
33
|
+
@io.rewind
|
34
|
+
@io.read(8)
|
23
35
|
end
|
24
36
|
|
25
37
|
private
|
Binary file
|
data/test/test_eighty_format.rb
CHANGED
@@ -2,10 +2,16 @@ require 'test/helper'
|
|
2
2
|
|
3
3
|
class TestEightyFormat < Test::Unit::TestCase
|
4
4
|
TEST_80 = 'test/test_data/wikipedia.80'
|
5
|
+
TEST_80_GZ = 'test/test_data/wikipedia.80.gz'
|
5
6
|
TEST_80_HTML = 'test/test_data/wikipedia-index.html'
|
6
7
|
|
7
8
|
def test_parse_correct
|
8
|
-
[
|
9
|
+
[
|
10
|
+
File.open(TEST_80),
|
11
|
+
TEST_80,
|
12
|
+
File.open(TEST_80_GZ),
|
13
|
+
TEST_80_GZ
|
14
|
+
].each do |file_or_io|
|
9
15
|
howmany = 0
|
10
16
|
file = EightyFormat.new(file_or_io).each do |entry|
|
11
17
|
assert_equal("http://en.wikipedia.org/wiki/Main_Page", entry.url)
|
@@ -16,7 +22,12 @@ class TestEightyFormat < Test::Unit::TestCase
|
|
16
22
|
assert_equal(1, howmany)
|
17
23
|
end
|
18
24
|
|
19
|
-
[
|
25
|
+
[
|
26
|
+
File.open(TEST_80),
|
27
|
+
TEST_80,
|
28
|
+
File.open(TEST_80_GZ),
|
29
|
+
TEST_80_GZ
|
30
|
+
].each do |file_or_io|
|
20
31
|
howmany = 0
|
21
32
|
EightyFormat.new(file_or_io).each do |entry|
|
22
33
|
howmany += 1
|
metadata
CHANGED
@@ -1,13 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: 80legs
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
hash:
|
4
|
+
hash: 23
|
5
5
|
prerelease:
|
6
6
|
segments:
|
7
7
|
- 0
|
8
|
-
-
|
8
|
+
- 2
|
9
9
|
- 0
|
10
|
-
version: 0.
|
10
|
+
version: 0.2.0
|
11
11
|
platform: ruby
|
12
12
|
authors:
|
13
13
|
- Jan Szumiec
|
@@ -15,7 +15,7 @@ autorequire:
|
|
15
15
|
bindir: bin
|
16
16
|
cert_chain: []
|
17
17
|
|
18
|
-
date: 2011-07-
|
18
|
+
date: 2011-07-11 00:00:00 +02:00
|
19
19
|
default_executable:
|
20
20
|
dependencies:
|
21
21
|
- !ruby/object:Gem::Dependency
|
@@ -117,6 +117,7 @@ files:
|
|
117
117
|
- test/helper.rb
|
118
118
|
- test/test_data/wikipedia-index.html
|
119
119
|
- test/test_data/wikipedia.80
|
120
|
+
- test/test_data/wikipedia.80.gz
|
120
121
|
- test/test_eighty_format.rb
|
121
122
|
has_rdoc: true
|
122
123
|
homepage: http://github.com/jasiek/80legs
|