iudex-brutefuzzy-service 1.2.b.0-java
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/History.rdoc +2 -0
- data/Manifest.txt +17 -0
- data/README.rdoc +33 -0
- data/Rakefile +7 -0
- data/bin/iudex-brutefuzzy-client +162 -0
- data/bin/iudex-brutefuzzy-service-fg +50 -0
- data/config/config.rb +23 -0
- data/init/iudex-brutefuzzy-service +48 -0
- data/lib/iudex-brutefuzzy-service.rb +37 -0
- data/lib/iudex-brutefuzzy-service/agent.rb +71 -0
- data/lib/iudex-brutefuzzy-service/base.rb +23 -0
- data/lib/iudex-brutefuzzy-service/destinations.rb +71 -0
- data/lib/iudex-brutefuzzy-service/iudex-brutefuzzy-service-1.2.b.0.jar +0 -0
- data/pom.xml +63 -0
- data/test/setup.rb +44 -0
- data/test/test_agent.rb +64 -0
- data/test/test_qpid_context.rb +43 -0
- metadata +164 -0
data/History.rdoc
ADDED
data/Manifest.txt
ADDED
@@ -0,0 +1,17 @@
|
|
1
|
+
History.rdoc
|
2
|
+
Manifest.txt
|
3
|
+
README.rdoc
|
4
|
+
Rakefile
|
5
|
+
pom.xml
|
6
|
+
bin/iudex-brutefuzzy-client
|
7
|
+
bin/iudex-brutefuzzy-service-fg
|
8
|
+
config/config.rb
|
9
|
+
init/iudex-brutefuzzy-service
|
10
|
+
lib/iudex-brutefuzzy-service/base.rb
|
11
|
+
lib/iudex-brutefuzzy-service.rb
|
12
|
+
lib/iudex-brutefuzzy-service/agent.rb
|
13
|
+
lib/iudex-brutefuzzy-service/destinations.rb
|
14
|
+
test/setup.rb
|
15
|
+
test/test_agent.rb
|
16
|
+
test/test_qpid_context.rb
|
17
|
+
lib/iudex-brutefuzzy-service/iudex-brutefuzzy-service-1.2.b.0.jar
|
data/README.rdoc
ADDED
@@ -0,0 +1,33 @@
|
|
1
|
+
= iudex-brutefuzzy-service
|
2
|
+
|
3
|
+
* http://github.com/dekellum/iudex
|
4
|
+
|
5
|
+
== Description
|
6
|
+
|
7
|
+
Iudex is a general purpose web crawler and feed processor in
|
8
|
+
ruby/java. The iudex-brutefuzzy-service provides a fuzzy simhash
|
9
|
+
lookup index as a distributed service.
|
10
|
+
|
11
|
+
== Dependencies
|
12
|
+
|
13
|
+
* Java 1.5+
|
14
|
+
|
15
|
+
For tests:
|
16
|
+
|
17
|
+
* JRuby 1.3+
|
18
|
+
|
19
|
+
== License
|
20
|
+
|
21
|
+
Copyright (c) 2008-2012 David Kellum
|
22
|
+
|
23
|
+
Licensed under the Apache License, Version 2.0 (the "License"); you
|
24
|
+
may not use this file except in compliance with the License. You
|
25
|
+
may obtain a copy of the License at:
|
26
|
+
|
27
|
+
http://www.apache.org/licenses/LICENSE-2.0
|
28
|
+
|
29
|
+
Unless required by applicable law or agreed to in writing, software
|
30
|
+
distributed under the License is distributed on an "AS IS" BASIS,
|
31
|
+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
32
|
+
implied. See the License for the specific language governing
|
33
|
+
permissions and limitations under the License.
|
data/Rakefile
ADDED
@@ -0,0 +1,162 @@
|
|
1
|
+
#!/usr/bin/env jruby
|
2
|
+
# -*- ruby -*-
|
3
|
+
|
4
|
+
#--
|
5
|
+
# Copyright (c) 2008-2012 David Kellum
|
6
|
+
#
|
7
|
+
# Licensed under the Apache License, Version 2.0 (the "License"); you
|
8
|
+
# may not use this file except in compliance with the License. You
|
9
|
+
# may obtain a copy of the License at
|
10
|
+
#
|
11
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
12
|
+
#
|
13
|
+
# Unless required by applicable law or agreed to in writing, software
|
14
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
15
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
16
|
+
# implied. See the License for the specific language governing
|
17
|
+
# permissions and limitations under the License.
|
18
|
+
#++
|
19
|
+
|
20
|
+
$LOAD_PATH.unshift File.join( File.dirname(__FILE__), "..", "lib" )
|
21
|
+
|
22
|
+
module IudexBruteFuzzyClient
|
23
|
+
|
24
|
+
require 'rubygems'
|
25
|
+
require 'rjack-logback'
|
26
|
+
|
27
|
+
include RJack
|
28
|
+
Logback.config_console( :thread => true )
|
29
|
+
|
30
|
+
require 'iudex-brutefuzzy-service'
|
31
|
+
require 'iudex-brutefuzzy-service/destinations'
|
32
|
+
|
33
|
+
include Iudex
|
34
|
+
include Iudex::BruteFuzzy::Service
|
35
|
+
include Iudex::SimHash::BruteFuzzy
|
36
|
+
|
37
|
+
Hooker.log_with { |m| SLF4J[ 'iudex' ].info( m.rstrip ) }
|
38
|
+
|
39
|
+
def self.parse_options
|
40
|
+
options = {}
|
41
|
+
|
42
|
+
OptionParser.new do |opts|
|
43
|
+
opts.banner = <<BANNER
|
44
|
+
Usage: iudex-brutefuzzy-client [options] | [KeyFile]...
|
45
|
+
BANNER
|
46
|
+
opts.on( "--sample-random-keys N", Integer,
|
47
|
+
"Sample N randomly selected test keys to stdout" ) do |n|
|
48
|
+
options[ :sample_random_keys ] = n
|
49
|
+
end
|
50
|
+
opts.on( "--add-random-keys N", Integer,
|
51
|
+
"Add N randomly selected test keys to the service" ) do |n|
|
52
|
+
options[ :add_random_keys ] = n
|
53
|
+
end
|
54
|
+
opts.on( "-v", "--version", "Display version and exit" ) do
|
55
|
+
puts "iudex-brutefuzzy-client: #{ Iudex::BruteFuzzy::Service::VERSION }"
|
56
|
+
exit 1
|
57
|
+
end
|
58
|
+
opts.on( "-d", "--debug", "Enable verbose DEBUG logging" ) do
|
59
|
+
Logback[ 'iudex' ].level = Logback::DEBUG
|
60
|
+
end
|
61
|
+
Hooker.register_config( opts )
|
62
|
+
end.parse!
|
63
|
+
|
64
|
+
options
|
65
|
+
end
|
66
|
+
|
67
|
+
import 'iudex.brutefuzzy.service.Client'
|
68
|
+
import 'rjack.jms.JMSConnector'
|
69
|
+
|
70
|
+
BruteFuzzyUtil = Java::iudex.simhash.brutefuzzy.BruteFuzzy
|
71
|
+
|
72
|
+
def self.random_keys( n )
|
73
|
+
while n > 0
|
74
|
+
c = [ n, 1_000_000 ].min
|
75
|
+
yield BruteFuzzyUtil.testKeys( c, 3, rand( 999_999_999 ) )
|
76
|
+
n -= c
|
77
|
+
end
|
78
|
+
end
|
79
|
+
|
80
|
+
def self.keys_from_files( files, bsize = 1_000 )
|
81
|
+
buffer = []
|
82
|
+
|
83
|
+
files.each do |fname|
|
84
|
+
open( fname, 'r' ) do |fin|
|
85
|
+
fin.each do |line|
|
86
|
+
buffer << BruteFuzzyUtil.unsignedHexToLong( line.rstrip )
|
87
|
+
if buffer.length >= bsize
|
88
|
+
yield buffer
|
89
|
+
buffer.clear
|
90
|
+
end
|
91
|
+
end
|
92
|
+
yield buffer unless buffer.empty?
|
93
|
+
end
|
94
|
+
end
|
95
|
+
end
|
96
|
+
|
97
|
+
def self.run
|
98
|
+
@log = SLF4J[ 'iudex.brutefuzzy.service.Client' ]
|
99
|
+
|
100
|
+
options = parse_options
|
101
|
+
@cnt = 0
|
102
|
+
|
103
|
+
if ( n = options[ :sample_random_keys ] )
|
104
|
+
random_keys( n ) { |keys| write_batch( keys ) }
|
105
|
+
else
|
106
|
+
with_client do
|
107
|
+
if ( n = options[ :add_random_keys ] )
|
108
|
+
random_keys( n ) { |keys| check_batch( keys ) }
|
109
|
+
elsif ! ARGV.empty?
|
110
|
+
keys_from_files( ARGV ) { |keys| check_batch( keys ) }
|
111
|
+
else # client (response listener) only
|
112
|
+
@client.check( 0, false ) # Hack to initialize
|
113
|
+
sleep # until interrupted
|
114
|
+
end
|
115
|
+
end
|
116
|
+
end
|
117
|
+
end
|
118
|
+
|
119
|
+
def self.with_client
|
120
|
+
# Wire up JMS client
|
121
|
+
Hooker.with( :iudex ) do |h|
|
122
|
+
ctx = QpidClient::QpidJMSContext.new
|
123
|
+
Destinations.apply( ctx )
|
124
|
+
h.apply( :jms_context, ctx )
|
125
|
+
|
126
|
+
@connector = h.apply( :jms_connector, JMSConnector.new( ctx ) )
|
127
|
+
@connector.do_close_connections = false
|
128
|
+
@connector.start
|
129
|
+
|
130
|
+
@client = h.apply( :brutefuzzy_client, Client.new( @connector ) )
|
131
|
+
|
132
|
+
end
|
133
|
+
|
134
|
+
yield
|
135
|
+
ensure
|
136
|
+
if @client
|
137
|
+
@client.close
|
138
|
+
@client = nil
|
139
|
+
end
|
140
|
+
@connector.stop if @connector
|
141
|
+
end
|
142
|
+
|
143
|
+
def self.check_batch( keys )
|
144
|
+
keys.each do |k|
|
145
|
+
@client.check( k, true )
|
146
|
+
@cnt += 1
|
147
|
+
@log.info( "Sent: #{ @cnt }" ) if ( @cnt % 50_000 ) == 0
|
148
|
+
end
|
149
|
+
end
|
150
|
+
|
151
|
+
def self.write_batch( keys )
|
152
|
+
keys.each do |k|
|
153
|
+
h = Java::java.lang.Long::toHexString( k )
|
154
|
+
(16 - h.length).times { $stdout.write '0' }
|
155
|
+
puts h
|
156
|
+
end
|
157
|
+
@cnt += keys.length
|
158
|
+
end
|
159
|
+
|
160
|
+
run
|
161
|
+
|
162
|
+
end
|
@@ -0,0 +1,50 @@
|
|
1
|
+
#!/usr/bin/env jruby
|
2
|
+
#.hashdot.vm.options += -Xmx1g
|
3
|
+
# -*- ruby -*-
|
4
|
+
|
5
|
+
#--
|
6
|
+
# Copyright (c) 2008-2012 David Kellum
|
7
|
+
#
|
8
|
+
# Licensed under the Apache License, Version 2.0 (the "License"); you
|
9
|
+
# may not use this file except in compliance with the License. You
|
10
|
+
# may obtain a copy of the License at
|
11
|
+
#
|
12
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
13
|
+
#
|
14
|
+
# Unless required by applicable law or agreed to in writing, software
|
15
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
16
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
17
|
+
# implied. See the License for the specific language governing
|
18
|
+
# permissions and limitations under the License.
|
19
|
+
#++
|
20
|
+
|
21
|
+
$LOAD_PATH.unshift File.join( File.dirname(__FILE__), "..", "lib" )
|
22
|
+
|
23
|
+
module IudexBinScript
|
24
|
+
|
25
|
+
require 'rubygems'
|
26
|
+
require 'rjack-logback'
|
27
|
+
|
28
|
+
include RJack
|
29
|
+
Logback.config_console( :thread => true )
|
30
|
+
|
31
|
+
require 'iudex-brutefuzzy-service'
|
32
|
+
|
33
|
+
include Iudex
|
34
|
+
|
35
|
+
Hooker.log_with { |m| SLF4J[ 'iudex' ].info( m.rstrip ) }
|
36
|
+
|
37
|
+
OptionParser.new do |opts|
|
38
|
+
opts.on( "-v", "--version", "Display version" ) do
|
39
|
+
puts "iudex-brutefuzzy-service: #{ BruteFuzzy::Service::VERSION }"
|
40
|
+
exit 1
|
41
|
+
end
|
42
|
+
opts.on( "-d", "--debug", "Enable verbose DEBUG logging" ) do
|
43
|
+
Logback[ 'iudex' ].level = Logback::DEBUG
|
44
|
+
end
|
45
|
+
Hooker.register_config( opts )
|
46
|
+
end.parse!
|
47
|
+
|
48
|
+
BruteFuzzy::Service::Agent.new.run
|
49
|
+
|
50
|
+
end
|
data/config/config.rb
ADDED
@@ -0,0 +1,23 @@
|
|
1
|
+
Iudex.configure do |c|
|
2
|
+
|
3
|
+
c.setup_brutefuzzy_agent do |a|
|
4
|
+
|
5
|
+
def a.create_fuzzy_set
|
6
|
+
Iudex::SimHash::BruteFuzzy::FuzzyTree64.new( 8 * 1024 * 1024, 3, 16 )
|
7
|
+
end
|
8
|
+
|
9
|
+
end
|
10
|
+
|
11
|
+
c.with( :jms ) do |jc|
|
12
|
+
|
13
|
+
jc.setup_context do |ctx|
|
14
|
+
# ctx.brokers = [ [ "host-a" ], [ "host-b" ] ]
|
15
|
+
end
|
16
|
+
|
17
|
+
jc.setup_connector do |cntr|
|
18
|
+
# cntr.max_connect_delay = 60_000 #ms
|
19
|
+
end
|
20
|
+
|
21
|
+
end
|
22
|
+
|
23
|
+
end
|
@@ -0,0 +1,48 @@
|
|
1
|
+
#!/usr/bin/env jruby
|
2
|
+
# -*- ruby -*-
|
3
|
+
#. hashdot.profile += daemon
|
4
|
+
#. hashdot.pid_file = ./iudex-brutefuzzy-service.pid
|
5
|
+
#. hashdot.io_redirect.file = ./iudex-brutefuzzy-service.log
|
6
|
+
#. hashdot.vm.options += -Xmx2g
|
7
|
+
#. hashdot.vm.options += -XX:+UseConcMarkSweepGC -XX:+CMSClassUnloadingEnabled
|
8
|
+
#. hashdot.vm.options += -XX:+UseCompressedOops
|
9
|
+
|
10
|
+
#--
|
11
|
+
# Copyright (c) 2008-2012 David Kellum
|
12
|
+
#
|
13
|
+
# Licensed under the Apache License, Version 2.0 (the "License"); you
|
14
|
+
# may not use this file except in compliance with the License. You
|
15
|
+
# may obtain a copy of the License at
|
16
|
+
#
|
17
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
18
|
+
#
|
19
|
+
# Unless required by applicable law or agreed to in writing, software
|
20
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
21
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
22
|
+
# implied. See the License for the specific language governing
|
23
|
+
# permissions and limitations under the License.
|
24
|
+
#++
|
25
|
+
|
26
|
+
require 'rubygems'
|
27
|
+
|
28
|
+
gem( "iudex-brutefuzzy-service", "= 1.2.b.0" )
|
29
|
+
|
30
|
+
module IudexInitScript
|
31
|
+
|
32
|
+
require 'rjack-logback'
|
33
|
+
include RJack
|
34
|
+
|
35
|
+
Logback.config_console( :full => true, :thread => true )
|
36
|
+
|
37
|
+
require 'iudex-brutefuzzy-service'
|
38
|
+
include Iudex
|
39
|
+
|
40
|
+
Hooker.log_with { |m| SLF4J[ 'iudex' ].info( m.rstrip ) }
|
41
|
+
|
42
|
+
if File.exist?( './config.rb' )
|
43
|
+
Hooker.load_file( './config.rb' )
|
44
|
+
end
|
45
|
+
|
46
|
+
BruteFuzzy::Service::Agent.new.run
|
47
|
+
|
48
|
+
end
|
@@ -0,0 +1,37 @@
|
|
1
|
+
#--
|
2
|
+
# Copyright (c) 2008-2012 David Kellum
|
3
|
+
#
|
4
|
+
# Licensed under the Apache License, Version 2.0 (the "License"); you
|
5
|
+
# may not use this file except in compliance with the License. You may
|
6
|
+
# obtain a copy of the License at
|
7
|
+
#
|
8
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
9
|
+
#
|
10
|
+
# Unless required by applicable law or agreed to in writing, software
|
11
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
12
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
13
|
+
# implied. See the License for the specific language governing
|
14
|
+
# permissions and limitations under the License.
|
15
|
+
#++
|
16
|
+
|
17
|
+
require 'iudex-core'
|
18
|
+
require 'iudex-simhash'
|
19
|
+
require 'iudex-brutefuzzy-protobuf'
|
20
|
+
|
21
|
+
require 'rjack-qpid-client'
|
22
|
+
|
23
|
+
require 'iudex-brutefuzzy-service/base.rb'
|
24
|
+
|
25
|
+
require 'java'
|
26
|
+
|
27
|
+
module Iudex
|
28
|
+
module BruteFuzzy
|
29
|
+
module Service
|
30
|
+
require "iudex-brutefuzzy-service/iudex-brutefuzzy-service-#{VERSION}.jar"
|
31
|
+
|
32
|
+
import 'iudex.brutefuzzy.service.Service'
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
37
|
+
require 'iudex-brutefuzzy-service/agent'
|
@@ -0,0 +1,71 @@
|
|
1
|
+
#--
|
2
|
+
# Copyright (c) 2008-2012 David Kellum
|
3
|
+
#
|
4
|
+
# Licensed under the Apache License, Version 2.0 (the "License"); you
|
5
|
+
# may not use this file except in compliance with the License. You may
|
6
|
+
# obtain a copy of the License at
|
7
|
+
#
|
8
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
9
|
+
#
|
10
|
+
# Unless required by applicable law or agreed to in writing, software
|
11
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
12
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
13
|
+
# implied. See the License for the specific language governing
|
14
|
+
# permissions and limitations under the License.
|
15
|
+
#++
|
16
|
+
|
17
|
+
require 'iudex-brutefuzzy-service'
|
18
|
+
require 'iudex-brutefuzzy-service/destinations'
|
19
|
+
|
20
|
+
require 'hooker'
|
21
|
+
|
22
|
+
module Iudex::BruteFuzzy::Service
|
23
|
+
|
24
|
+
class Agent
|
25
|
+
include Iudex::Core
|
26
|
+
include Iudex::BruteFuzzy
|
27
|
+
include Iudex::SimHash::BruteFuzzy
|
28
|
+
|
29
|
+
include RJack::QpidClient
|
30
|
+
|
31
|
+
import 'rjack.jms.JMSConnector'
|
32
|
+
|
33
|
+
def initialize
|
34
|
+
Hooker.apply( [ :iudex, :brutefuzzy_agent ], self )
|
35
|
+
end
|
36
|
+
|
37
|
+
def fuzzy_set
|
38
|
+
FuzzyTree64.new( 500_000, 3, 16 )
|
39
|
+
end
|
40
|
+
|
41
|
+
def jms_context
|
42
|
+
ctx = QpidJMSContext.new
|
43
|
+
Destinations.apply( ctx )
|
44
|
+
ctx
|
45
|
+
end
|
46
|
+
|
47
|
+
def jms_connector( ctx )
|
48
|
+
connector = JMSConnector.new( ctx )
|
49
|
+
connector.max_connect_delay = java.lang.Integer::MAX_VALUE
|
50
|
+
connector.do_close_connections = false
|
51
|
+
connector
|
52
|
+
end
|
53
|
+
|
54
|
+
def run
|
55
|
+
ctx = jms_context
|
56
|
+
Hooker.apply( [ :jms, :context ], ctx )
|
57
|
+
|
58
|
+
connector = jms_connector( ctx )
|
59
|
+
Hooker.apply( [ :jms, :connector ], connector )
|
60
|
+
|
61
|
+
service = Service.new( fuzzy_set )
|
62
|
+
Hooker.apply( [ :iudex, :brutefuzzy_service ], service )
|
63
|
+
|
64
|
+
Hooker.log_not_applied # All hooks should be used by now
|
65
|
+
|
66
|
+
connector.add_connect_listener( service )
|
67
|
+
connector.connect_loop
|
68
|
+
end
|
69
|
+
|
70
|
+
end
|
71
|
+
end
|
@@ -0,0 +1,23 @@
|
|
1
|
+
#--
|
2
|
+
# Copyright (c) 2008-2012 David Kellum
|
3
|
+
#
|
4
|
+
# Licensed under the Apache License, Version 2.0 (the "License"); you
|
5
|
+
# may not use this file except in compliance with the License. You may
|
6
|
+
# obtain a copy of the License at
|
7
|
+
#
|
8
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
9
|
+
#
|
10
|
+
# Unless required by applicable law or agreed to in writing, software
|
11
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
12
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
13
|
+
# implied. See the License for the specific language governing
|
14
|
+
# permissions and limitations under the License.
|
15
|
+
#++
|
16
|
+
|
17
|
+
module Iudex
|
18
|
+
module BruteFuzzy
|
19
|
+
module Service
|
20
|
+
VERSION = '1.2.b.0'
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
@@ -0,0 +1,71 @@
|
|
1
|
+
#--
|
2
|
+
# Copyright (c) 2008-2012 David Kellum
|
3
|
+
#
|
4
|
+
# Licensed under the Apache License, Version 2.0 (the "License"); you
|
5
|
+
# may not use this file except in compliance with the License. You may
|
6
|
+
# obtain a copy of the License at
|
7
|
+
#
|
8
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
9
|
+
#
|
10
|
+
# Unless required by applicable law or agreed to in writing, software
|
11
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
12
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
13
|
+
# implied. See the License for the specific language governing
|
14
|
+
# permissions and limitations under the License.
|
15
|
+
#++
|
16
|
+
|
17
|
+
module Iudex::BruteFuzzy::Service
|
18
|
+
|
19
|
+
module Destinations
|
20
|
+
|
21
|
+
def self.apply( ctx )
|
22
|
+
|
23
|
+
ctx.destinations[ 'brutefuzzy-response-ex' ] = {
|
24
|
+
:assert => :sender,
|
25
|
+
:create => :sender,
|
26
|
+
:node => {
|
27
|
+
:type => :topic,
|
28
|
+
'x-declare' => {
|
29
|
+
:type => :fanout,
|
30
|
+
}
|
31
|
+
}
|
32
|
+
}
|
33
|
+
|
34
|
+
# Direct request writes are are needed for querying depth
|
35
|
+
# (explicit flow control). Thus no exchange here.
|
36
|
+
ctx.destinations[ 'brutefuzzy-request' ] = {
|
37
|
+
:assert => :always,
|
38
|
+
:create => :always,
|
39
|
+
:node => {
|
40
|
+
:type => :queue,
|
41
|
+
'x-declare' => {
|
42
|
+
:arguments => {
|
43
|
+
'qpid.max_size' => 500_000,
|
44
|
+
'qpid.policy_type' => :reject,
|
45
|
+
}
|
46
|
+
}
|
47
|
+
}
|
48
|
+
}
|
49
|
+
|
50
|
+
ctx.destinations[ 'brutefuzzy-client' ] = {
|
51
|
+
:address => ctx.address_per_process( 'brutefuzzy-client' ),
|
52
|
+
:assert => :receiver,
|
53
|
+
:create => :receiver,
|
54
|
+
:delete => :receiver,
|
55
|
+
:node => {
|
56
|
+
:type => :queue,
|
57
|
+
'x-bindings' => [ { :exchange => 'brutefuzzy-response-ex' } ],
|
58
|
+
'x-declare' => {
|
59
|
+
'auto-delete' => true,
|
60
|
+
:arguments => {
|
61
|
+
'qpid.max_size' => 500_000,
|
62
|
+
'qpid.policy_type' => :ring,
|
63
|
+
}
|
64
|
+
}
|
65
|
+
}
|
66
|
+
}
|
67
|
+
|
68
|
+
end
|
69
|
+
end
|
70
|
+
|
71
|
+
end
|
Binary file
|
data/pom.xml
ADDED
@@ -0,0 +1,63 @@
|
|
1
|
+
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
|
2
|
+
<modelVersion>4.0.0</modelVersion>
|
3
|
+
<groupId>iudex</groupId>
|
4
|
+
<artifactId>iudex-brutefuzzy-service</artifactId>
|
5
|
+
<packaging>jar</packaging>
|
6
|
+
<version>1.2.b.0</version>
|
7
|
+
<name>Iudex BruteFuzzy Service</name>
|
8
|
+
|
9
|
+
<parent>
|
10
|
+
<groupId>iudex</groupId>
|
11
|
+
<artifactId>iudex-parent</artifactId>
|
12
|
+
<version>1.2.b.0</version>
|
13
|
+
<relativePath>..</relativePath>
|
14
|
+
</parent>
|
15
|
+
|
16
|
+
<dependencies>
|
17
|
+
|
18
|
+
<dependency>
|
19
|
+
<groupId>iudex</groupId>
|
20
|
+
<artifactId>iudex-core</artifactId>
|
21
|
+
<version>[1.2,1.2.9999)</version>
|
22
|
+
</dependency>
|
23
|
+
|
24
|
+
<dependency>
|
25
|
+
<groupId>iudex</groupId>
|
26
|
+
<artifactId>iudex-brutefuzzy-protobuf</artifactId>
|
27
|
+
<version>[1.2,1.2.9999)</version>
|
28
|
+
</dependency>
|
29
|
+
|
30
|
+
<dependency>
|
31
|
+
<groupId>iudex</groupId>
|
32
|
+
<artifactId>iudex-simhash</artifactId>
|
33
|
+
<version>[1.2,1.2.9999)</version>
|
34
|
+
</dependency>
|
35
|
+
|
36
|
+
<dependency>
|
37
|
+
<groupId>rjack</groupId>
|
38
|
+
<artifactId>rjack-jms</artifactId>
|
39
|
+
<version>[1.1,1.2)</version>
|
40
|
+
</dependency>
|
41
|
+
|
42
|
+
<dependency>
|
43
|
+
<groupId>org.apache.qpid</groupId>
|
44
|
+
<artifactId>qpid-client</artifactId>
|
45
|
+
<version>0.14</version>
|
46
|
+
</dependency>
|
47
|
+
|
48
|
+
</dependencies>
|
49
|
+
|
50
|
+
<build>
|
51
|
+
<plugins>
|
52
|
+
<plugin>
|
53
|
+
<!-- Parent settings -->
|
54
|
+
<artifactId>maven-compiler-plugin</artifactId>
|
55
|
+
</plugin>
|
56
|
+
<plugin>
|
57
|
+
<!-- Parent settings -->
|
58
|
+
<artifactId>maven-source-plugin</artifactId>
|
59
|
+
</plugin>
|
60
|
+
</plugins>
|
61
|
+
</build>
|
62
|
+
|
63
|
+
</project>
|
data/test/setup.rb
ADDED
@@ -0,0 +1,44 @@
|
|
1
|
+
#--
|
2
|
+
# Copyright (c) 2008-2012 David Kellum
|
3
|
+
#
|
4
|
+
# Licensed under the Apache License, Version 2.0 (the "License"); you
|
5
|
+
# may not use this file except in compliance with the License. You
|
6
|
+
# may obtain a copy of the License at
|
7
|
+
#
|
8
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
9
|
+
#
|
10
|
+
# Unless required by applicable law or agreed to in writing, software
|
11
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
12
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
13
|
+
# implied. See the License for the specific language governing
|
14
|
+
# permissions and limitations under the License.
|
15
|
+
#++
|
16
|
+
|
17
|
+
#### General test setup, logging, console output ####
|
18
|
+
|
19
|
+
require 'rubygems'
|
20
|
+
require 'bundler/setup'
|
21
|
+
|
22
|
+
require 'minitest/unit'
|
23
|
+
require 'minitest/autorun'
|
24
|
+
|
25
|
+
require 'rjack-logback'
|
26
|
+
|
27
|
+
module TestSetup
|
28
|
+
include RJack
|
29
|
+
Logback.config_console( :stderr => true, :thread => true )
|
30
|
+
|
31
|
+
if ( ARGV & %w[ -v --verbose --debug ] ).empty?
|
32
|
+
# Make test output logging compatible: no partial lines.
|
33
|
+
class TestOut
|
34
|
+
def print( *a ); $stdout.puts( *a ); end
|
35
|
+
def puts( *a ); $stdout.puts( *a ); end
|
36
|
+
end
|
37
|
+
MiniTest::Unit.output = TestOut.new
|
38
|
+
else
|
39
|
+
Logback.root.level = Logback::DEBUG
|
40
|
+
end
|
41
|
+
|
42
|
+
ARGV.delete( '--debug' )
|
43
|
+
|
44
|
+
end
|
data/test/test_agent.rb
ADDED
@@ -0,0 +1,64 @@
|
|
1
|
+
#!/usr/bin/env jruby
|
2
|
+
#.hashdot.profile += jruby-shortlived
|
3
|
+
|
4
|
+
#--
|
5
|
+
# Copyright (c) 2008-2012 David Kellum
|
6
|
+
#
|
7
|
+
# Licensed under the Apache License, Version 2.0 (the "License"); you
|
8
|
+
# may not use this file except in compliance with the License. You may
|
9
|
+
# obtain a copy of the License at
|
10
|
+
#
|
11
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
12
|
+
#
|
13
|
+
# Unless required by applicable law or agreed to in writing, software
|
14
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
15
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
16
|
+
# implied. See the License for the specific language governing
|
17
|
+
# permissions and limitations under the License.
|
18
|
+
#++
|
19
|
+
|
20
|
+
require File.join( File.dirname( __FILE__ ), "setup" )
|
21
|
+
|
22
|
+
require 'iudex-brutefuzzy-service'
|
23
|
+
|
24
|
+
class TestAgent < MiniTest::Unit::TestCase
|
25
|
+
include Iudex::BruteFuzzy::Service
|
26
|
+
|
27
|
+
def teardown
|
28
|
+
Hooker.send( :clear )
|
29
|
+
end
|
30
|
+
|
31
|
+
def test_agent_default
|
32
|
+
assert_agent
|
33
|
+
end
|
34
|
+
|
35
|
+
def test_agent_with_sample_config
|
36
|
+
|
37
|
+
# Test out the sample config
|
38
|
+
Hooker.load_file( File.join( File.dirname( __FILE__ ),
|
39
|
+
'..', 'config', 'config.rb' ) )
|
40
|
+
assert_agent
|
41
|
+
end
|
42
|
+
|
43
|
+
def assert_agent
|
44
|
+
|
45
|
+
agent = Agent.new
|
46
|
+
|
47
|
+
def agent.jms_connector( ctx )
|
48
|
+
c = super
|
49
|
+
def c.connect_loop
|
50
|
+
:test_run_return
|
51
|
+
end
|
52
|
+
c
|
53
|
+
end
|
54
|
+
|
55
|
+
assert_equal( agent.run, :test_run_return )
|
56
|
+
|
57
|
+
Hooker.check_not_applied do |*args|
|
58
|
+
flunk( "Hooks not applied: " + args.inspect )
|
59
|
+
end
|
60
|
+
pass
|
61
|
+
|
62
|
+
end
|
63
|
+
|
64
|
+
end
|
@@ -0,0 +1,43 @@
|
|
1
|
+
#!/usr/bin/env jruby
|
2
|
+
#.hashdot.profile += jruby-shortlived
|
3
|
+
|
4
|
+
#--
|
5
|
+
# Copyright (c) 2008-2012 David Kellum
|
6
|
+
#
|
7
|
+
# Licensed under the Apache License, Version 2.0 (the "License"); you
|
8
|
+
# may not use this file except in compliance with the License. You may
|
9
|
+
# obtain a copy of the License at
|
10
|
+
#
|
11
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
12
|
+
#
|
13
|
+
# Unless required by applicable law or agreed to in writing, software
|
14
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
15
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
16
|
+
# implied. See the License for the specific language governing
|
17
|
+
# permissions and limitations under the License.
|
18
|
+
#++
|
19
|
+
|
20
|
+
require File.join( File.dirname( __FILE__ ), "setup" )
|
21
|
+
|
22
|
+
require 'iudex-brutefuzzy-service'
|
23
|
+
require 'iudex-brutefuzzy-service/destinations'
|
24
|
+
|
25
|
+
class TestQpidContext < MiniTest::Unit::TestCase
|
26
|
+
include RJack::QpidClient
|
27
|
+
include Iudex::BruteFuzzy::Service
|
28
|
+
|
29
|
+
def test_destinations
|
30
|
+
con = nil
|
31
|
+
ctx = QpidJMSContext.new
|
32
|
+
Destinations.apply( ctx )
|
33
|
+
|
34
|
+
assert( con = ctx.create_connection )
|
35
|
+
assert( ctx.lookup_destination( 'brutefuzzy-request' ) )
|
36
|
+
assert( ctx.lookup_destination( 'brutefuzzy-response-ex' ) )
|
37
|
+
assert( ctx.lookup_destination( 'brutefuzzy-client' ) )
|
38
|
+
ensure
|
39
|
+
ctx.close if ctx
|
40
|
+
con.close if con
|
41
|
+
end
|
42
|
+
|
43
|
+
end
|
metadata
ADDED
@@ -0,0 +1,164 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: iudex-brutefuzzy-service
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
prerelease: 4
|
5
|
+
version: 1.2.b.0
|
6
|
+
platform: java
|
7
|
+
authors:
|
8
|
+
- David Kellum
|
9
|
+
autorequire:
|
10
|
+
bindir: bin
|
11
|
+
cert_chain: []
|
12
|
+
|
13
|
+
date: 2012-03-05 00:00:00 Z
|
14
|
+
dependencies:
|
15
|
+
- !ruby/object:Gem::Dependency
|
16
|
+
name: iudex-core
|
17
|
+
version_requirements: &id001 !ruby/object:Gem::Requirement
|
18
|
+
none: false
|
19
|
+
requirements:
|
20
|
+
- - ~>
|
21
|
+
- !ruby/object:Gem::Version
|
22
|
+
version: 1.2.b
|
23
|
+
requirement: *id001
|
24
|
+
prerelease: false
|
25
|
+
type: :runtime
|
26
|
+
- !ruby/object:Gem::Dependency
|
27
|
+
name: rjack-logback
|
28
|
+
version_requirements: &id002 !ruby/object:Gem::Requirement
|
29
|
+
none: false
|
30
|
+
requirements:
|
31
|
+
- - ~>
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: "1.0"
|
34
|
+
requirement: *id002
|
35
|
+
prerelease: false
|
36
|
+
type: :runtime
|
37
|
+
- !ruby/object:Gem::Dependency
|
38
|
+
name: iudex-simhash
|
39
|
+
version_requirements: &id003 !ruby/object:Gem::Requirement
|
40
|
+
none: false
|
41
|
+
requirements:
|
42
|
+
- - ~>
|
43
|
+
- !ruby/object:Gem::Version
|
44
|
+
version: 1.2.b
|
45
|
+
requirement: *id003
|
46
|
+
prerelease: false
|
47
|
+
type: :runtime
|
48
|
+
- !ruby/object:Gem::Dependency
|
49
|
+
name: iudex-brutefuzzy-protobuf
|
50
|
+
version_requirements: &id004 !ruby/object:Gem::Requirement
|
51
|
+
none: false
|
52
|
+
requirements:
|
53
|
+
- - ~>
|
54
|
+
- !ruby/object:Gem::Version
|
55
|
+
version: 1.2.b
|
56
|
+
requirement: *id004
|
57
|
+
prerelease: false
|
58
|
+
type: :runtime
|
59
|
+
- !ruby/object:Gem::Dependency
|
60
|
+
name: rjack-jms
|
61
|
+
version_requirements: &id005 !ruby/object:Gem::Requirement
|
62
|
+
none: false
|
63
|
+
requirements:
|
64
|
+
- - ~>
|
65
|
+
- !ruby/object:Gem::Version
|
66
|
+
version: 1.1.0
|
67
|
+
requirement: *id005
|
68
|
+
prerelease: false
|
69
|
+
type: :runtime
|
70
|
+
- !ruby/object:Gem::Dependency
|
71
|
+
name: rjack-qpid-client
|
72
|
+
version_requirements: &id006 !ruby/object:Gem::Requirement
|
73
|
+
none: false
|
74
|
+
requirements:
|
75
|
+
- - ~>
|
76
|
+
- !ruby/object:Gem::Version
|
77
|
+
version: 0.14.0
|
78
|
+
requirement: *id006
|
79
|
+
prerelease: false
|
80
|
+
type: :runtime
|
81
|
+
- !ruby/object:Gem::Dependency
|
82
|
+
name: minitest
|
83
|
+
version_requirements: &id007 !ruby/object:Gem::Requirement
|
84
|
+
none: false
|
85
|
+
requirements:
|
86
|
+
- - ~>
|
87
|
+
- !ruby/object:Gem::Version
|
88
|
+
version: "2.3"
|
89
|
+
requirement: *id007
|
90
|
+
prerelease: false
|
91
|
+
type: :development
|
92
|
+
- !ruby/object:Gem::Dependency
|
93
|
+
name: rjack-tarpit
|
94
|
+
version_requirements: &id008 !ruby/object:Gem::Requirement
|
95
|
+
none: false
|
96
|
+
requirements:
|
97
|
+
- - ~>
|
98
|
+
- !ruby/object:Gem::Version
|
99
|
+
version: "2.0"
|
100
|
+
requirement: *id008
|
101
|
+
prerelease: false
|
102
|
+
type: :development
|
103
|
+
description: Iudex is a general purpose web crawler and feed processor in ruby/java. The iudex-brutefuzzy-service provides a fuzzy simhash lookup index as a distributed service.
|
104
|
+
email:
|
105
|
+
- dek-oss@gravitext.com
|
106
|
+
executables:
|
107
|
+
- iudex-brutefuzzy-client
|
108
|
+
- iudex-brutefuzzy-service-fg
|
109
|
+
extensions: []
|
110
|
+
|
111
|
+
extra_rdoc_files:
|
112
|
+
- History.rdoc
|
113
|
+
- README.rdoc
|
114
|
+
files:
|
115
|
+
- History.rdoc
|
116
|
+
- Manifest.txt
|
117
|
+
- README.rdoc
|
118
|
+
- Rakefile
|
119
|
+
- pom.xml
|
120
|
+
- bin/iudex-brutefuzzy-client
|
121
|
+
- bin/iudex-brutefuzzy-service-fg
|
122
|
+
- config/config.rb
|
123
|
+
- init/iudex-brutefuzzy-service
|
124
|
+
- lib/iudex-brutefuzzy-service/base.rb
|
125
|
+
- lib/iudex-brutefuzzy-service.rb
|
126
|
+
- lib/iudex-brutefuzzy-service/agent.rb
|
127
|
+
- lib/iudex-brutefuzzy-service/destinations.rb
|
128
|
+
- test/setup.rb
|
129
|
+
- test/test_agent.rb
|
130
|
+
- test/test_qpid_context.rb
|
131
|
+
- lib/iudex-brutefuzzy-service/iudex-brutefuzzy-service-1.2.b.0.jar
|
132
|
+
homepage: http://github.com/dekellum/iudex
|
133
|
+
licenses: []
|
134
|
+
|
135
|
+
post_install_message:
|
136
|
+
rdoc_options:
|
137
|
+
- --main
|
138
|
+
- README.rdoc
|
139
|
+
require_paths:
|
140
|
+
- lib
|
141
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
142
|
+
none: false
|
143
|
+
requirements:
|
144
|
+
- - ">="
|
145
|
+
- !ruby/object:Gem::Version
|
146
|
+
hash: 2
|
147
|
+
segments:
|
148
|
+
- 0
|
149
|
+
version: "0"
|
150
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
151
|
+
none: false
|
152
|
+
requirements:
|
153
|
+
- - ">"
|
154
|
+
- !ruby/object:Gem::Version
|
155
|
+
version: 1.3.1
|
156
|
+
requirements: []
|
157
|
+
|
158
|
+
rubyforge_project:
|
159
|
+
rubygems_version: 1.8.15
|
160
|
+
signing_key:
|
161
|
+
specification_version: 3
|
162
|
+
summary: Iudex is a general purpose web crawler and feed processor in ruby/java.
|
163
|
+
test_files: []
|
164
|
+
|