iudex-simhash 1.0.0-java → 1.1.0-java

Sign up to get free protection for your applications and to get access to all the features.
data/.gemtest ADDED
File without changes
data/History.rdoc CHANGED
@@ -1,2 +1,6 @@
1
+ === 1.1.0 (2011-11-13)
2
+ * Update to iudex-html ~> 1.1.0
3
+ * Update to minitest ~> 2.3
4
+
1
5
  === 1.0.0 (2011-04-04)
2
6
  * Initial release.
data/Manifest.txt CHANGED
@@ -13,4 +13,4 @@ test/setup.rb
13
13
  test/test_fuzzy_set.rb
14
14
  test/test_simhash_generator.rb
15
15
  test/html/gentest.html
16
- lib/iudex-simhash/iudex-simhash-1.0.0.jar
16
+ lib/iudex-simhash/iudex-simhash-1.1.0.jar
data/Rakefile CHANGED
@@ -4,7 +4,7 @@ $LOAD_PATH << './lib'
4
4
  require 'iudex-simhash/base'
5
5
 
6
6
  require 'rubygems'
7
- gem 'rjack-tarpit', '~> 1.2'
7
+ gem 'rjack-tarpit', '~> 1.4'
8
8
  require 'rjack-tarpit'
9
9
 
10
10
  t = RJack::TarPit.new( 'iudex-simhash',
@@ -13,10 +13,10 @@ t = RJack::TarPit.new( 'iudex-simhash',
13
13
 
14
14
  t.specify do |h|
15
15
  h.developer( "David Kellum", "dek-oss@gravitext.com" )
16
- h.extra_deps += [ [ 'iudex-html', '~> 1.0.0' ] ]
16
+ h.extra_deps += [ [ 'iudex-html', '~> 1.1.0' ] ]
17
17
 
18
18
  h.testlib = :minitest
19
- h.extra_dev_deps += [ [ 'minitest', '>= 1.7.1', '< 2.1' ],
19
+ h.extra_dev_deps += [ [ 'minitest', '~> 2.3' ],
20
20
  [ 'rjack-logback', '~> 1.0' ] ]
21
21
  end
22
22
 
@@ -16,6 +16,6 @@
16
16
 
17
17
  module Iudex
18
18
  module SimHash
19
- VERSION = '1.0.0'
19
+ VERSION = '1.1.0'
20
20
  end
21
21
  end
@@ -28,9 +28,10 @@ module Iudex
28
28
  'config', 'stopwords.en' )
29
29
 
30
30
  def simhash_stopwords( wfile = DEFAULT_WORDS )
31
- words = File.open( wfile ) { |fin| fin.readlines }
32
- words.map! { |w| w.strip }
33
- words.reject! { |w| w =~ /^#/ }
31
+ words =
32
+ File.open( wfile ) { |fin| fin.readlines }.
33
+ map { |w| w.strip }.
34
+ reject { |w| w =~ /^#/ }
34
35
 
35
36
  Gen::StopWordSet.new( words )
36
37
  end
@@ -40,7 +41,9 @@ module Iudex
40
41
  def simhash_generator( input = :simhash_generator_inputs,
41
42
  stopwords = simhash_stopwords )
42
43
 
43
- inputs = send( input ).map { |r| r.to_a }.map do | key, ratio |
44
+ inputs = send( input ).
45
+ map { |r| Array( r ) }.
46
+ map do | key, ratio |
44
47
  key = key.to_k
45
48
  i = if( key.value_type == Element.java_class )
46
49
  SimHashGenerator::Input.forTree( key )
data/pom.xml CHANGED
@@ -3,13 +3,13 @@
3
3
  <groupId>iudex</groupId>
4
4
  <artifactId>iudex-simhash</artifactId>
5
5
  <packaging>jar</packaging>
6
- <version>1.0.0</version>
6
+ <version>1.1.0</version>
7
7
  <name>Iudex simhash production and searching</name>
8
8
 
9
9
  <parent>
10
10
  <groupId>iudex</groupId>
11
11
  <artifactId>iudex-parent</artifactId>
12
- <version>1.0</version>
12
+ <version>1.1</version>
13
13
  <relativePath>..</relativePath>
14
14
  </parent>
15
15
 
@@ -18,7 +18,7 @@
18
18
  <dependency>
19
19
  <groupId>iudex</groupId>
20
20
  <artifactId>iudex-html</artifactId>
21
- <version>[1.0,1.1)</version>
21
+ <version>[1.1,1.2)</version>
22
22
  </dependency>
23
23
 
24
24
  <dependency>
@@ -51,26 +51,26 @@ class TestFuzzySet < MiniTest::Unit::TestCase
51
51
 
52
52
  def test_match
53
53
  m = FuzzyList64.new( 100, 4 )
54
- assert( m.fuzzy_match( 0, 0 ) )
55
- assert( m.fuzzy_match( hex( '7FFF_FFFF_FFFF_FFFF' ),
56
- hex( '7FFF_FFFF_7777_FFFF' ) ) )
54
+ assert( m.fuzzy_match( 0, 0 ) )
55
+ assert( m.fuzzy_match( hex( '7FFF_FFFF_FFFF_FFFF' ),
56
+ hex( '7FFF_FFFF_7777_FFFF' ) ) )
57
57
 
58
- assert( m.fuzzy_match( hex( 'FFFF_FFFF_FFFF_FFFF' ),
59
- hex( 'FFFF_FFFF_7777_FFFF' ) ) )
58
+ assert( m.fuzzy_match( hex( 'FFFF_FFFF_FFFF_FFFF' ),
59
+ hex( 'FFFF_FFFF_7777_FFFF' ) ) )
60
60
 
61
- assert( ! m.fuzzy_match( hex( '7FFF_FFFF_FFFF_FFFF' ),
62
- hex( '7FFF_FFFF_EFFF_7777' ) ) )
61
+ refute( m.fuzzy_match( hex( '7FFF_FFFF_FFFF_FFFF' ),
62
+ hex( '7FFF_FFFF_EFFF_7777' ) ) )
63
63
 
64
- assert( ! m.fuzzy_match( hex( 'FFFF_FFFF_FFFF_FFFF' ),
65
- hex( 'FFFF_FFFF_EFFF_7777' ) ) )
64
+ refute( m.fuzzy_match( hex( 'FFFF_FFFF_FFFF_FFFF' ),
65
+ hex( 'FFFF_FFFF_EFFF_7777' ) ) )
66
66
  end
67
67
 
68
68
  def test_add
69
69
  m = FuzzyList64.new( 100, 4 )
70
- assert( m.addIfNotFound( 0x0 ) )
71
- assert( m.addIfNotFound( 0xFF ) )
72
- assert( ! m.addIfNotFound( 0xFE ) )
73
- assert( ! m.addIfNotFound( 0x1 ) )
70
+ assert( m.addIfNotFound( 0x0 ) )
71
+ assert( m.addIfNotFound( 0xFF ) )
72
+ refute( m.addIfNotFound( 0xFE ) )
73
+ refute( m.addIfNotFound( 0x1 ) )
74
74
  end
75
75
 
76
76
  def test_series_list
@@ -92,7 +92,7 @@ class TestFuzzySet < MiniTest::Unit::TestCase
92
92
  s = s.dup
93
93
  last = s.pop # Remove last for now
94
94
  assert_series_all( fset, s )
95
- assert( ! fset.addIfNotFound( hex( last ) ), last )
95
+ refute( fset.addIfNotFound( hex( last ) ), last )
96
96
  end
97
97
 
98
98
  def assert_series_all( fset, s )
@@ -119,21 +119,21 @@ class TestFuzzySet < MiniTest::Unit::TestCase
119
119
  last = s.pop # Remove last for now
120
120
  assert_find_series_all( fset, s )
121
121
  l = Java::java.util.ArrayList.new;
122
- assert( ! fset.addFindAll( hex( last ), l ) )
122
+ refute( fset.addFindAll( hex( last ), l ) )
123
123
  assert( l.size(), 1 );
124
124
 
125
125
  # Remove the match and try again.
126
126
  assert( fset.remove( l.get( 0 ) ), "remove match" )
127
127
  assert( fset.remove( hex( last ) ), "remove last" )
128
128
  l.clear
129
- assert( ! fset.addFindAll( hex( last ), l ) )
129
+ refute( fset.addFindAll( hex( last ), l ) )
130
130
  assert( l.empty? )
131
131
  end
132
132
 
133
133
  def assert_find_series_all( fset, s )
134
134
  s.each do |k|
135
135
  l = Java::java.util.ArrayList.new;
136
- assert( ! fset.addFindAll( hex( k ), l ) )
136
+ refute( fset.addFindAll( hex( k ), l ) )
137
137
  assert( l.empty? )
138
138
  end
139
139
  end
metadata CHANGED
@@ -2,7 +2,7 @@
2
2
  name: iudex-simhash
3
3
  version: !ruby/object:Gem::Version
4
4
  prerelease:
5
- version: 1.0.0
5
+ version: 1.1.0
6
6
  platform: java
7
7
  authors:
8
8
  - David Kellum
@@ -10,8 +10,7 @@ autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
12
 
13
- date: 2011-04-04 00:00:00 -07:00
14
- default_executable:
13
+ date: 2011-11-13 00:00:00 Z
15
14
  dependencies:
16
15
  - !ruby/object:Gem::Dependency
17
16
  name: iudex-html
@@ -21,7 +20,7 @@ dependencies:
21
20
  requirements:
22
21
  - - ~>
23
22
  - !ruby/object:Gem::Version
24
- version: 1.0.0
23
+ version: 1.1.0
25
24
  type: :runtime
26
25
  version_requirements: *id001
27
26
  - !ruby/object:Gem::Dependency
@@ -30,12 +29,9 @@ dependencies:
30
29
  requirement: &id002 !ruby/object:Gem::Requirement
31
30
  none: false
32
31
  requirements:
33
- - - ">="
34
- - !ruby/object:Gem::Version
35
- version: 1.7.1
36
- - - <
32
+ - - ~>
37
33
  - !ruby/object:Gem::Version
38
- version: "2.1"
34
+ version: "2.3"
39
35
  type: :development
40
36
  version_requirements: *id002
41
37
  - !ruby/object:Gem::Dependency
@@ -57,7 +53,7 @@ dependencies:
57
53
  requirements:
58
54
  - - ~>
59
55
  - !ruby/object:Gem::Version
60
- version: 1.3.0
56
+ version: 1.4.0
61
57
  type: :development
62
58
  version_requirements: *id004
63
59
  description: |-
@@ -90,8 +86,8 @@ files:
90
86
  - test/test_fuzzy_set.rb
91
87
  - test/test_simhash_generator.rb
92
88
  - test/html/gentest.html
93
- - lib/iudex-simhash/iudex-simhash-1.0.0.jar
94
- has_rdoc: true
89
+ - lib/iudex-simhash/iudex-simhash-1.1.0.jar
90
+ - .gemtest
95
91
  homepage: http://github.com/dekellum/iudex
96
92
  licenses: []
97
93
 
@@ -116,7 +112,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
116
112
  requirements: []
117
113
 
118
114
  rubyforge_project: iudex-simhash
119
- rubygems_version: 1.5.1
115
+ rubygems_version: 1.8.9
120
116
  signing_key:
121
117
  specification_version: 3
122
118
  summary: Iudex is a general purpose web crawler and feed processor in ruby/java