iudex-simhash 1.0.0-java → 1.1.0-java

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/.gemtest ADDED
File without changes
data/History.rdoc CHANGED
@@ -1,2 +1,6 @@
1
+ === 1.1.0 (2011-11-13)
2
+ * Update to iudex-html ~> 1.1.0
3
+ * Update to minitest ~> 2.3
4
+
1
5
  === 1.0.0 (2011-04-04)
2
6
  * Initial release.
data/Manifest.txt CHANGED
@@ -13,4 +13,4 @@ test/setup.rb
13
13
  test/test_fuzzy_set.rb
14
14
  test/test_simhash_generator.rb
15
15
  test/html/gentest.html
16
- lib/iudex-simhash/iudex-simhash-1.0.0.jar
16
+ lib/iudex-simhash/iudex-simhash-1.1.0.jar
data/Rakefile CHANGED
@@ -4,7 +4,7 @@ $LOAD_PATH << './lib'
4
4
  require 'iudex-simhash/base'
5
5
 
6
6
  require 'rubygems'
7
- gem 'rjack-tarpit', '~> 1.2'
7
+ gem 'rjack-tarpit', '~> 1.4'
8
8
  require 'rjack-tarpit'
9
9
 
10
10
  t = RJack::TarPit.new( 'iudex-simhash',
@@ -13,10 +13,10 @@ t = RJack::TarPit.new( 'iudex-simhash',
13
13
 
14
14
  t.specify do |h|
15
15
  h.developer( "David Kellum", "dek-oss@gravitext.com" )
16
- h.extra_deps += [ [ 'iudex-html', '~> 1.0.0' ] ]
16
+ h.extra_deps += [ [ 'iudex-html', '~> 1.1.0' ] ]
17
17
 
18
18
  h.testlib = :minitest
19
- h.extra_dev_deps += [ [ 'minitest', '>= 1.7.1', '< 2.1' ],
19
+ h.extra_dev_deps += [ [ 'minitest', '~> 2.3' ],
20
20
  [ 'rjack-logback', '~> 1.0' ] ]
21
21
  end
22
22
 
@@ -16,6 +16,6 @@
16
16
 
17
17
  module Iudex
18
18
  module SimHash
19
- VERSION = '1.0.0'
19
+ VERSION = '1.1.0'
20
20
  end
21
21
  end
@@ -28,9 +28,10 @@ module Iudex
28
28
  'config', 'stopwords.en' )
29
29
 
30
30
  def simhash_stopwords( wfile = DEFAULT_WORDS )
31
- words = File.open( wfile ) { |fin| fin.readlines }
32
- words.map! { |w| w.strip }
33
- words.reject! { |w| w =~ /^#/ }
31
+ words =
32
+ File.open( wfile ) { |fin| fin.readlines }.
33
+ map { |w| w.strip }.
34
+ reject { |w| w =~ /^#/ }
34
35
 
35
36
  Gen::StopWordSet.new( words )
36
37
  end
@@ -40,7 +41,9 @@ module Iudex
40
41
  def simhash_generator( input = :simhash_generator_inputs,
41
42
  stopwords = simhash_stopwords )
42
43
 
43
- inputs = send( input ).map { |r| r.to_a }.map do | key, ratio |
44
+ inputs = send( input ).
45
+ map { |r| Array( r ) }.
46
+ map do | key, ratio |
44
47
  key = key.to_k
45
48
  i = if( key.value_type == Element.java_class )
46
49
  SimHashGenerator::Input.forTree( key )
data/pom.xml CHANGED
@@ -3,13 +3,13 @@
3
3
  <groupId>iudex</groupId>
4
4
  <artifactId>iudex-simhash</artifactId>
5
5
  <packaging>jar</packaging>
6
- <version>1.0.0</version>
6
+ <version>1.1.0</version>
7
7
  <name>Iudex simhash production and searching</name>
8
8
 
9
9
  <parent>
10
10
  <groupId>iudex</groupId>
11
11
  <artifactId>iudex-parent</artifactId>
12
- <version>1.0</version>
12
+ <version>1.1</version>
13
13
  <relativePath>..</relativePath>
14
14
  </parent>
15
15
 
@@ -18,7 +18,7 @@
18
18
  <dependency>
19
19
  <groupId>iudex</groupId>
20
20
  <artifactId>iudex-html</artifactId>
21
- <version>[1.0,1.1)</version>
21
+ <version>[1.1,1.2)</version>
22
22
  </dependency>
23
23
 
24
24
  <dependency>
@@ -51,26 +51,26 @@ class TestFuzzySet < MiniTest::Unit::TestCase
51
51
 
52
52
  def test_match
53
53
  m = FuzzyList64.new( 100, 4 )
54
- assert( m.fuzzy_match( 0, 0 ) )
55
- assert( m.fuzzy_match( hex( '7FFF_FFFF_FFFF_FFFF' ),
56
- hex( '7FFF_FFFF_7777_FFFF' ) ) )
54
+ assert( m.fuzzy_match( 0, 0 ) )
55
+ assert( m.fuzzy_match( hex( '7FFF_FFFF_FFFF_FFFF' ),
56
+ hex( '7FFF_FFFF_7777_FFFF' ) ) )
57
57
 
58
- assert( m.fuzzy_match( hex( 'FFFF_FFFF_FFFF_FFFF' ),
59
- hex( 'FFFF_FFFF_7777_FFFF' ) ) )
58
+ assert( m.fuzzy_match( hex( 'FFFF_FFFF_FFFF_FFFF' ),
59
+ hex( 'FFFF_FFFF_7777_FFFF' ) ) )
60
60
 
61
- assert( ! m.fuzzy_match( hex( '7FFF_FFFF_FFFF_FFFF' ),
62
- hex( '7FFF_FFFF_EFFF_7777' ) ) )
61
+ refute( m.fuzzy_match( hex( '7FFF_FFFF_FFFF_FFFF' ),
62
+ hex( '7FFF_FFFF_EFFF_7777' ) ) )
63
63
 
64
- assert( ! m.fuzzy_match( hex( 'FFFF_FFFF_FFFF_FFFF' ),
65
- hex( 'FFFF_FFFF_EFFF_7777' ) ) )
64
+ refute( m.fuzzy_match( hex( 'FFFF_FFFF_FFFF_FFFF' ),
65
+ hex( 'FFFF_FFFF_EFFF_7777' ) ) )
66
66
  end
67
67
 
68
68
  def test_add
69
69
  m = FuzzyList64.new( 100, 4 )
70
- assert( m.addIfNotFound( 0x0 ) )
71
- assert( m.addIfNotFound( 0xFF ) )
72
- assert( ! m.addIfNotFound( 0xFE ) )
73
- assert( ! m.addIfNotFound( 0x1 ) )
70
+ assert( m.addIfNotFound( 0x0 ) )
71
+ assert( m.addIfNotFound( 0xFF ) )
72
+ refute( m.addIfNotFound( 0xFE ) )
73
+ refute( m.addIfNotFound( 0x1 ) )
74
74
  end
75
75
 
76
76
  def test_series_list
@@ -92,7 +92,7 @@ class TestFuzzySet < MiniTest::Unit::TestCase
92
92
  s = s.dup
93
93
  last = s.pop # Remove last for now
94
94
  assert_series_all( fset, s )
95
- assert( ! fset.addIfNotFound( hex( last ) ), last )
95
+ refute( fset.addIfNotFound( hex( last ) ), last )
96
96
  end
97
97
 
98
98
  def assert_series_all( fset, s )
@@ -119,21 +119,21 @@ class TestFuzzySet < MiniTest::Unit::TestCase
119
119
  last = s.pop # Remove last for now
120
120
  assert_find_series_all( fset, s )
121
121
  l = Java::java.util.ArrayList.new;
122
- assert( ! fset.addFindAll( hex( last ), l ) )
122
+ refute( fset.addFindAll( hex( last ), l ) )
123
123
  assert( l.size(), 1 );
124
124
 
125
125
  # Remove the match and try again.
126
126
  assert( fset.remove( l.get( 0 ) ), "remove match" )
127
127
  assert( fset.remove( hex( last ) ), "remove last" )
128
128
  l.clear
129
- assert( ! fset.addFindAll( hex( last ), l ) )
129
+ refute( fset.addFindAll( hex( last ), l ) )
130
130
  assert( l.empty? )
131
131
  end
132
132
 
133
133
  def assert_find_series_all( fset, s )
134
134
  s.each do |k|
135
135
  l = Java::java.util.ArrayList.new;
136
- assert( ! fset.addFindAll( hex( k ), l ) )
136
+ refute( fset.addFindAll( hex( k ), l ) )
137
137
  assert( l.empty? )
138
138
  end
139
139
  end
metadata CHANGED
@@ -2,7 +2,7 @@
2
2
  name: iudex-simhash
3
3
  version: !ruby/object:Gem::Version
4
4
  prerelease:
5
- version: 1.0.0
5
+ version: 1.1.0
6
6
  platform: java
7
7
  authors:
8
8
  - David Kellum
@@ -10,8 +10,7 @@ autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
12
 
13
- date: 2011-04-04 00:00:00 -07:00
14
- default_executable:
13
+ date: 2011-11-13 00:00:00 Z
15
14
  dependencies:
16
15
  - !ruby/object:Gem::Dependency
17
16
  name: iudex-html
@@ -21,7 +20,7 @@ dependencies:
21
20
  requirements:
22
21
  - - ~>
23
22
  - !ruby/object:Gem::Version
24
- version: 1.0.0
23
+ version: 1.1.0
25
24
  type: :runtime
26
25
  version_requirements: *id001
27
26
  - !ruby/object:Gem::Dependency
@@ -30,12 +29,9 @@ dependencies:
30
29
  requirement: &id002 !ruby/object:Gem::Requirement
31
30
  none: false
32
31
  requirements:
33
- - - ">="
34
- - !ruby/object:Gem::Version
35
- version: 1.7.1
36
- - - <
32
+ - - ~>
37
33
  - !ruby/object:Gem::Version
38
- version: "2.1"
34
+ version: "2.3"
39
35
  type: :development
40
36
  version_requirements: *id002
41
37
  - !ruby/object:Gem::Dependency
@@ -57,7 +53,7 @@ dependencies:
57
53
  requirements:
58
54
  - - ~>
59
55
  - !ruby/object:Gem::Version
60
- version: 1.3.0
56
+ version: 1.4.0
61
57
  type: :development
62
58
  version_requirements: *id004
63
59
  description: |-
@@ -90,8 +86,8 @@ files:
90
86
  - test/test_fuzzy_set.rb
91
87
  - test/test_simhash_generator.rb
92
88
  - test/html/gentest.html
93
- - lib/iudex-simhash/iudex-simhash-1.0.0.jar
94
- has_rdoc: true
89
+ - lib/iudex-simhash/iudex-simhash-1.1.0.jar
90
+ - .gemtest
95
91
  homepage: http://github.com/dekellum/iudex
96
92
  licenses: []
97
93
 
@@ -116,7 +112,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
116
112
  requirements: []
117
113
 
118
114
  rubyforge_project: iudex-simhash
119
- rubygems_version: 1.5.1
115
+ rubygems_version: 1.8.9
120
116
  signing_key:
121
117
  specification_version: 3
122
118
  summary: Iudex is a general purpose web crawler and feed processor in ruby/java