80legs 0.1.0 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/VERSION +1 -1
- data/lib/eighty_legs/eighty_format.rb +16 -4
- data/test/test_data/wikipedia.80.gz +0 -0
- data/test/test_eighty_format.rb +13 -2
- metadata +5 -4
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.
|
1
|
+
0.2.0
|
@@ -1,16 +1,27 @@
|
|
1
1
|
module EightyLegs
|
2
2
|
class EightyFormat
|
3
3
|
def initialize(filename_or_io)
|
4
|
-
|
5
|
-
|
4
|
+
if filename_or_io.is_a?(String)
|
5
|
+
initialize_with_filename(filename_or_io)
|
6
6
|
elsif filename_or_io.is_a?(IO)
|
7
|
-
filename_or_io
|
7
|
+
initialize_with_io(filename_or_io)
|
8
8
|
else
|
9
9
|
raise TypeError.new(filename_or_io.class)
|
10
10
|
end
|
11
11
|
check_for_classid_and_version()
|
12
12
|
end
|
13
13
|
|
14
|
+
def initialize_with_filename(filename)
|
15
|
+
initialize_with_io(File.open(filename))
|
16
|
+
end
|
17
|
+
|
18
|
+
def initialize_with_io(io)
|
19
|
+
@io = Zlib::GzipReader.new(io)
|
20
|
+
rescue Zlib::GzipFile::Error
|
21
|
+
@io = io
|
22
|
+
@io.rewind
|
23
|
+
end
|
24
|
+
|
14
25
|
def each(&blk)
|
15
26
|
while not @io.eof?
|
16
27
|
url_size = @io.read(4).unpack("i").first
|
@@ -19,7 +30,8 @@ module EightyLegs
|
|
19
30
|
data = @io.read(data_size)
|
20
31
|
blk.call(Entry.new(url, data))
|
21
32
|
end
|
22
|
-
@io.
|
33
|
+
@io.rewind
|
34
|
+
@io.read(8)
|
23
35
|
end
|
24
36
|
|
25
37
|
private
|
Binary file
|
data/test/test_eighty_format.rb
CHANGED
@@ -2,10 +2,16 @@ require 'test/helper'
|
|
2
2
|
|
3
3
|
class TestEightyFormat < Test::Unit::TestCase
|
4
4
|
TEST_80 = 'test/test_data/wikipedia.80'
|
5
|
+
TEST_80_GZ = 'test/test_data/wikipedia.80.gz'
|
5
6
|
TEST_80_HTML = 'test/test_data/wikipedia-index.html'
|
6
7
|
|
7
8
|
def test_parse_correct
|
8
|
-
[
|
9
|
+
[
|
10
|
+
File.open(TEST_80),
|
11
|
+
TEST_80,
|
12
|
+
File.open(TEST_80_GZ),
|
13
|
+
TEST_80_GZ
|
14
|
+
].each do |file_or_io|
|
9
15
|
howmany = 0
|
10
16
|
file = EightyFormat.new(file_or_io).each do |entry|
|
11
17
|
assert_equal("http://en.wikipedia.org/wiki/Main_Page", entry.url)
|
@@ -16,7 +22,12 @@ class TestEightyFormat < Test::Unit::TestCase
|
|
16
22
|
assert_equal(1, howmany)
|
17
23
|
end
|
18
24
|
|
19
|
-
[
|
25
|
+
[
|
26
|
+
File.open(TEST_80),
|
27
|
+
TEST_80,
|
28
|
+
File.open(TEST_80_GZ),
|
29
|
+
TEST_80_GZ
|
30
|
+
].each do |file_or_io|
|
20
31
|
howmany = 0
|
21
32
|
EightyFormat.new(file_or_io).each do |entry|
|
22
33
|
howmany += 1
|
metadata
CHANGED
@@ -1,13 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: 80legs
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
hash:
|
4
|
+
hash: 23
|
5
5
|
prerelease:
|
6
6
|
segments:
|
7
7
|
- 0
|
8
|
-
-
|
8
|
+
- 2
|
9
9
|
- 0
|
10
|
-
version: 0.
|
10
|
+
version: 0.2.0
|
11
11
|
platform: ruby
|
12
12
|
authors:
|
13
13
|
- Jan Szumiec
|
@@ -15,7 +15,7 @@ autorequire:
|
|
15
15
|
bindir: bin
|
16
16
|
cert_chain: []
|
17
17
|
|
18
|
-
date: 2011-07-
|
18
|
+
date: 2011-07-11 00:00:00 +02:00
|
19
19
|
default_executable:
|
20
20
|
dependencies:
|
21
21
|
- !ruby/object:Gem::Dependency
|
@@ -117,6 +117,7 @@ files:
|
|
117
117
|
- test/helper.rb
|
118
118
|
- test/test_data/wikipedia-index.html
|
119
119
|
- test/test_data/wikipedia.80
|
120
|
+
- test/test_data/wikipedia.80.gz
|
120
121
|
- test/test_eighty_format.rb
|
121
122
|
has_rdoc: true
|
122
123
|
homepage: http://github.com/jasiek/80legs
|