iudex-brutefuzzy-service 1.2.b.0-java
Sign up to get free protection for your applications and to get access to all the features.
- data/History.rdoc +2 -0
- data/Manifest.txt +17 -0
- data/README.rdoc +33 -0
- data/Rakefile +7 -0
- data/bin/iudex-brutefuzzy-client +162 -0
- data/bin/iudex-brutefuzzy-service-fg +50 -0
- data/config/config.rb +23 -0
- data/init/iudex-brutefuzzy-service +48 -0
- data/lib/iudex-brutefuzzy-service.rb +37 -0
- data/lib/iudex-brutefuzzy-service/agent.rb +71 -0
- data/lib/iudex-brutefuzzy-service/base.rb +23 -0
- data/lib/iudex-brutefuzzy-service/destinations.rb +71 -0
- data/lib/iudex-brutefuzzy-service/iudex-brutefuzzy-service-1.2.b.0.jar +0 -0
- data/pom.xml +63 -0
- data/test/setup.rb +44 -0
- data/test/test_agent.rb +64 -0
- data/test/test_qpid_context.rb +43 -0
- metadata +164 -0
data/History.rdoc
ADDED
data/Manifest.txt
ADDED
@@ -0,0 +1,17 @@
|
|
1
|
+
History.rdoc
|
2
|
+
Manifest.txt
|
3
|
+
README.rdoc
|
4
|
+
Rakefile
|
5
|
+
pom.xml
|
6
|
+
bin/iudex-brutefuzzy-client
|
7
|
+
bin/iudex-brutefuzzy-service-fg
|
8
|
+
config/config.rb
|
9
|
+
init/iudex-brutefuzzy-service
|
10
|
+
lib/iudex-brutefuzzy-service/base.rb
|
11
|
+
lib/iudex-brutefuzzy-service.rb
|
12
|
+
lib/iudex-brutefuzzy-service/agent.rb
|
13
|
+
lib/iudex-brutefuzzy-service/destinations.rb
|
14
|
+
test/setup.rb
|
15
|
+
test/test_agent.rb
|
16
|
+
test/test_qpid_context.rb
|
17
|
+
lib/iudex-brutefuzzy-service/iudex-brutefuzzy-service-1.2.b.0.jar
|
data/README.rdoc
ADDED
@@ -0,0 +1,33 @@
|
|
1
|
+
= iudex-brutefuzzy-service
|
2
|
+
|
3
|
+
* http://github.com/dekellum/iudex
|
4
|
+
|
5
|
+
== Description
|
6
|
+
|
7
|
+
Iudex is a general purpose web crawler and feed processor in
|
8
|
+
ruby/java. The iudex-brutefuzzy-service provides a fuzzy simhash
|
9
|
+
lookup index as a distributed service.
|
10
|
+
|
11
|
+
== Dependencies
|
12
|
+
|
13
|
+
* Java 1.5+
|
14
|
+
|
15
|
+
For tests:
|
16
|
+
|
17
|
+
* JRuby 1.3+
|
18
|
+
|
19
|
+
== License
|
20
|
+
|
21
|
+
Copyright (c) 2008-2012 David Kellum
|
22
|
+
|
23
|
+
Licensed under the Apache License, Version 2.0 (the "License"); you
|
24
|
+
may not use this file except in compliance with the License. You
|
25
|
+
may obtain a copy of the License at:
|
26
|
+
|
27
|
+
http://www.apache.org/licenses/LICENSE-2.0
|
28
|
+
|
29
|
+
Unless required by applicable law or agreed to in writing, software
|
30
|
+
distributed under the License is distributed on an "AS IS" BASIS,
|
31
|
+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
32
|
+
implied. See the License for the specific language governing
|
33
|
+
permissions and limitations under the License.
|
data/Rakefile
ADDED
@@ -0,0 +1,162 @@
|
|
1
|
+
#!/usr/bin/env jruby
|
2
|
+
# -*- ruby -*-
|
3
|
+
|
4
|
+
#--
|
5
|
+
# Copyright (c) 2008-2012 David Kellum
|
6
|
+
#
|
7
|
+
# Licensed under the Apache License, Version 2.0 (the "License"); you
|
8
|
+
# may not use this file except in compliance with the License. You
|
9
|
+
# may obtain a copy of the License at
|
10
|
+
#
|
11
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
12
|
+
#
|
13
|
+
# Unless required by applicable law or agreed to in writing, software
|
14
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
15
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
16
|
+
# implied. See the License for the specific language governing
|
17
|
+
# permissions and limitations under the License.
|
18
|
+
#++
|
19
|
+
|
20
|
+
$LOAD_PATH.unshift File.join( File.dirname(__FILE__), "..", "lib" )
|
21
|
+
|
22
|
+
module IudexBruteFuzzyClient
|
23
|
+
|
24
|
+
require 'rubygems'
|
25
|
+
require 'rjack-logback'
|
26
|
+
|
27
|
+
include RJack
|
28
|
+
Logback.config_console( :thread => true )
|
29
|
+
|
30
|
+
require 'iudex-brutefuzzy-service'
|
31
|
+
require 'iudex-brutefuzzy-service/destinations'
|
32
|
+
|
33
|
+
include Iudex
|
34
|
+
include Iudex::BruteFuzzy::Service
|
35
|
+
include Iudex::SimHash::BruteFuzzy
|
36
|
+
|
37
|
+
Hooker.log_with { |m| SLF4J[ 'iudex' ].info( m.rstrip ) }
|
38
|
+
|
39
|
+
def self.parse_options
|
40
|
+
options = {}
|
41
|
+
|
42
|
+
OptionParser.new do |opts|
|
43
|
+
opts.banner = <<BANNER
|
44
|
+
Usage: iudex-brutefuzzy-client [options] | [KeyFile]...
|
45
|
+
BANNER
|
46
|
+
opts.on( "--sample-random-keys N", Integer,
|
47
|
+
"Sample N randomly selected test keys to stdout" ) do |n|
|
48
|
+
options[ :sample_random_keys ] = n
|
49
|
+
end
|
50
|
+
opts.on( "--add-random-keys N", Integer,
|
51
|
+
"Add N randomly selected test keys to the service" ) do |n|
|
52
|
+
options[ :add_random_keys ] = n
|
53
|
+
end
|
54
|
+
opts.on( "-v", "--version", "Display version and exit" ) do
|
55
|
+
puts "iudex-brutefuzzy-client: #{ Iudex::BruteFuzzy::Service::VERSION }"
|
56
|
+
exit 1
|
57
|
+
end
|
58
|
+
opts.on( "-d", "--debug", "Enable verbose DEBUG logging" ) do
|
59
|
+
Logback[ 'iudex' ].level = Logback::DEBUG
|
60
|
+
end
|
61
|
+
Hooker.register_config( opts )
|
62
|
+
end.parse!
|
63
|
+
|
64
|
+
options
|
65
|
+
end
|
66
|
+
|
67
|
+
import 'iudex.brutefuzzy.service.Client'
|
68
|
+
import 'rjack.jms.JMSConnector'
|
69
|
+
|
70
|
+
BruteFuzzyUtil = Java::iudex.simhash.brutefuzzy.BruteFuzzy
|
71
|
+
|
72
|
+
def self.random_keys( n )
|
73
|
+
while n > 0
|
74
|
+
c = [ n, 1_000_000 ].min
|
75
|
+
yield BruteFuzzyUtil.testKeys( c, 3, rand( 999_999_999 ) )
|
76
|
+
n -= c
|
77
|
+
end
|
78
|
+
end
|
79
|
+
|
80
|
+
def self.keys_from_files( files, bsize = 1_000 )
|
81
|
+
buffer = []
|
82
|
+
|
83
|
+
files.each do |fname|
|
84
|
+
open( fname, 'r' ) do |fin|
|
85
|
+
fin.each do |line|
|
86
|
+
buffer << BruteFuzzyUtil.unsignedHexToLong( line.rstrip )
|
87
|
+
if buffer.length >= bsize
|
88
|
+
yield buffer
|
89
|
+
buffer.clear
|
90
|
+
end
|
91
|
+
end
|
92
|
+
yield buffer unless buffer.empty?
|
93
|
+
end
|
94
|
+
end
|
95
|
+
end
|
96
|
+
|
97
|
+
def self.run
|
98
|
+
@log = SLF4J[ 'iudex.brutefuzzy.service.Client' ]
|
99
|
+
|
100
|
+
options = parse_options
|
101
|
+
@cnt = 0
|
102
|
+
|
103
|
+
if ( n = options[ :sample_random_keys ] )
|
104
|
+
random_keys( n ) { |keys| write_batch( keys ) }
|
105
|
+
else
|
106
|
+
with_client do
|
107
|
+
if ( n = options[ :add_random_keys ] )
|
108
|
+
random_keys( n ) { |keys| check_batch( keys ) }
|
109
|
+
elsif ! ARGV.empty?
|
110
|
+
keys_from_files( ARGV ) { |keys| check_batch( keys ) }
|
111
|
+
else # client (response listener) only
|
112
|
+
@client.check( 0, false ) # Hack to initialize
|
113
|
+
sleep # until interrupted
|
114
|
+
end
|
115
|
+
end
|
116
|
+
end
|
117
|
+
end
|
118
|
+
|
119
|
+
def self.with_client
|
120
|
+
# Wire up JMS client
|
121
|
+
Hooker.with( :iudex ) do |h|
|
122
|
+
ctx = QpidClient::QpidJMSContext.new
|
123
|
+
Destinations.apply( ctx )
|
124
|
+
h.apply( :jms_context, ctx )
|
125
|
+
|
126
|
+
@connector = h.apply( :jms_connector, JMSConnector.new( ctx ) )
|
127
|
+
@connector.do_close_connections = false
|
128
|
+
@connector.start
|
129
|
+
|
130
|
+
@client = h.apply( :brutefuzzy_client, Client.new( @connector ) )
|
131
|
+
|
132
|
+
end
|
133
|
+
|
134
|
+
yield
|
135
|
+
ensure
|
136
|
+
if @client
|
137
|
+
@client.close
|
138
|
+
@client = nil
|
139
|
+
end
|
140
|
+
@connector.stop if @connector
|
141
|
+
end
|
142
|
+
|
143
|
+
def self.check_batch( keys )
|
144
|
+
keys.each do |k|
|
145
|
+
@client.check( k, true )
|
146
|
+
@cnt += 1
|
147
|
+
@log.info( "Sent: #{ @cnt }" ) if ( @cnt % 50_000 ) == 0
|
148
|
+
end
|
149
|
+
end
|
150
|
+
|
151
|
+
def self.write_batch( keys )
|
152
|
+
keys.each do |k|
|
153
|
+
h = Java::java.lang.Long::toHexString( k )
|
154
|
+
(16 - h.length).times { $stdout.write '0' }
|
155
|
+
puts h
|
156
|
+
end
|
157
|
+
@cnt += keys.length
|
158
|
+
end
|
159
|
+
|
160
|
+
run
|
161
|
+
|
162
|
+
end
|
@@ -0,0 +1,50 @@
|
|
1
|
+
#!/usr/bin/env jruby
|
2
|
+
#.hashdot.vm.options += -Xmx1g
|
3
|
+
# -*- ruby -*-
|
4
|
+
|
5
|
+
#--
|
6
|
+
# Copyright (c) 2008-2012 David Kellum
|
7
|
+
#
|
8
|
+
# Licensed under the Apache License, Version 2.0 (the "License"); you
|
9
|
+
# may not use this file except in compliance with the License. You
|
10
|
+
# may obtain a copy of the License at
|
11
|
+
#
|
12
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
13
|
+
#
|
14
|
+
# Unless required by applicable law or agreed to in writing, software
|
15
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
16
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
17
|
+
# implied. See the License for the specific language governing
|
18
|
+
# permissions and limitations under the License.
|
19
|
+
#++
|
20
|
+
|
21
|
+
$LOAD_PATH.unshift File.join( File.dirname(__FILE__), "..", "lib" )
|
22
|
+
|
23
|
+
module IudexBinScript
|
24
|
+
|
25
|
+
require 'rubygems'
|
26
|
+
require 'rjack-logback'
|
27
|
+
|
28
|
+
include RJack
|
29
|
+
Logback.config_console( :thread => true )
|
30
|
+
|
31
|
+
require 'iudex-brutefuzzy-service'
|
32
|
+
|
33
|
+
include Iudex
|
34
|
+
|
35
|
+
Hooker.log_with { |m| SLF4J[ 'iudex' ].info( m.rstrip ) }
|
36
|
+
|
37
|
+
OptionParser.new do |opts|
|
38
|
+
opts.on( "-v", "--version", "Display version" ) do
|
39
|
+
puts "iudex-brutefuzzy-service: #{ BruteFuzzy::Service::VERSION }"
|
40
|
+
exit 1
|
41
|
+
end
|
42
|
+
opts.on( "-d", "--debug", "Enable verbose DEBUG logging" ) do
|
43
|
+
Logback[ 'iudex' ].level = Logback::DEBUG
|
44
|
+
end
|
45
|
+
Hooker.register_config( opts )
|
46
|
+
end.parse!
|
47
|
+
|
48
|
+
BruteFuzzy::Service::Agent.new.run
|
49
|
+
|
50
|
+
end
|
data/config/config.rb
ADDED
@@ -0,0 +1,23 @@
|
|
1
|
+
Iudex.configure do |c|
|
2
|
+
|
3
|
+
c.setup_brutefuzzy_agent do |a|
|
4
|
+
|
5
|
+
def a.create_fuzzy_set
|
6
|
+
Iudex::SimHash::BruteFuzzy::FuzzyTree64.new( 8 * 1024 * 1024, 3, 16 )
|
7
|
+
end
|
8
|
+
|
9
|
+
end
|
10
|
+
|
11
|
+
c.with( :jms ) do |jc|
|
12
|
+
|
13
|
+
jc.setup_context do |ctx|
|
14
|
+
# ctx.brokers = [ [ "host-a" ], [ "host-b" ] ]
|
15
|
+
end
|
16
|
+
|
17
|
+
jc.setup_connector do |cntr|
|
18
|
+
# cntr.max_connect_delay = 60_000 #ms
|
19
|
+
end
|
20
|
+
|
21
|
+
end
|
22
|
+
|
23
|
+
end
|
@@ -0,0 +1,48 @@
|
|
1
|
+
#!/usr/bin/env jruby
|
2
|
+
# -*- ruby -*-
|
3
|
+
#. hashdot.profile += daemon
|
4
|
+
#. hashdot.pid_file = ./iudex-brutefuzzy-service.pid
|
5
|
+
#. hashdot.io_redirect.file = ./iudex-brutefuzzy-service.log
|
6
|
+
#. hashdot.vm.options += -Xmx2g
|
7
|
+
#. hashdot.vm.options += -XX:+UseConcMarkSweepGC -XX:+CMSClassUnloadingEnabled
|
8
|
+
#. hashdot.vm.options += -XX:+UseCompressedOops
|
9
|
+
|
10
|
+
#--
|
11
|
+
# Copyright (c) 2008-2012 David Kellum
|
12
|
+
#
|
13
|
+
# Licensed under the Apache License, Version 2.0 (the "License"); you
|
14
|
+
# may not use this file except in compliance with the License. You
|
15
|
+
# may obtain a copy of the License at
|
16
|
+
#
|
17
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
18
|
+
#
|
19
|
+
# Unless required by applicable law or agreed to in writing, software
|
20
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
21
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
22
|
+
# implied. See the License for the specific language governing
|
23
|
+
# permissions and limitations under the License.
|
24
|
+
#++
|
25
|
+
|
26
|
+
require 'rubygems'
|
27
|
+
|
28
|
+
gem( "iudex-brutefuzzy-service", "= 1.2.b.0" )
|
29
|
+
|
30
|
+
module IudexInitScript
|
31
|
+
|
32
|
+
require 'rjack-logback'
|
33
|
+
include RJack
|
34
|
+
|
35
|
+
Logback.config_console( :full => true, :thread => true )
|
36
|
+
|
37
|
+
require 'iudex-brutefuzzy-service'
|
38
|
+
include Iudex
|
39
|
+
|
40
|
+
Hooker.log_with { |m| SLF4J[ 'iudex' ].info( m.rstrip ) }
|
41
|
+
|
42
|
+
if File.exist?( './config.rb' )
|
43
|
+
Hooker.load_file( './config.rb' )
|
44
|
+
end
|
45
|
+
|
46
|
+
BruteFuzzy::Service::Agent.new.run
|
47
|
+
|
48
|
+
end
|
@@ -0,0 +1,37 @@
|
|
1
|
+
#--
|
2
|
+
# Copyright (c) 2008-2012 David Kellum
|
3
|
+
#
|
4
|
+
# Licensed under the Apache License, Version 2.0 (the "License"); you
|
5
|
+
# may not use this file except in compliance with the License. You may
|
6
|
+
# obtain a copy of the License at
|
7
|
+
#
|
8
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
9
|
+
#
|
10
|
+
# Unless required by applicable law or agreed to in writing, software
|
11
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
12
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
13
|
+
# implied. See the License for the specific language governing
|
14
|
+
# permissions and limitations under the License.
|
15
|
+
#++
|
16
|
+
|
17
|
+
require 'iudex-core'
|
18
|
+
require 'iudex-simhash'
|
19
|
+
require 'iudex-brutefuzzy-protobuf'
|
20
|
+
|
21
|
+
require 'rjack-qpid-client'
|
22
|
+
|
23
|
+
require 'iudex-brutefuzzy-service/base.rb'
|
24
|
+
|
25
|
+
require 'java'
|
26
|
+
|
27
|
+
module Iudex
|
28
|
+
module BruteFuzzy
|
29
|
+
module Service
|
30
|
+
require "iudex-brutefuzzy-service/iudex-brutefuzzy-service-#{VERSION}.jar"
|
31
|
+
|
32
|
+
import 'iudex.brutefuzzy.service.Service'
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
37
|
+
require 'iudex-brutefuzzy-service/agent'
|
@@ -0,0 +1,71 @@
|
|
1
|
+
#--
|
2
|
+
# Copyright (c) 2008-2012 David Kellum
|
3
|
+
#
|
4
|
+
# Licensed under the Apache License, Version 2.0 (the "License"); you
|
5
|
+
# may not use this file except in compliance with the License. You may
|
6
|
+
# obtain a copy of the License at
|
7
|
+
#
|
8
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
9
|
+
#
|
10
|
+
# Unless required by applicable law or agreed to in writing, software
|
11
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
12
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
13
|
+
# implied. See the License for the specific language governing
|
14
|
+
# permissions and limitations under the License.
|
15
|
+
#++
|
16
|
+
|
17
|
+
require 'iudex-brutefuzzy-service'
|
18
|
+
require 'iudex-brutefuzzy-service/destinations'
|
19
|
+
|
20
|
+
require 'hooker'
|
21
|
+
|
22
|
+
module Iudex::BruteFuzzy::Service
|
23
|
+
|
24
|
+
class Agent
|
25
|
+
include Iudex::Core
|
26
|
+
include Iudex::BruteFuzzy
|
27
|
+
include Iudex::SimHash::BruteFuzzy
|
28
|
+
|
29
|
+
include RJack::QpidClient
|
30
|
+
|
31
|
+
import 'rjack.jms.JMSConnector'
|
32
|
+
|
33
|
+
def initialize
|
34
|
+
Hooker.apply( [ :iudex, :brutefuzzy_agent ], self )
|
35
|
+
end
|
36
|
+
|
37
|
+
def fuzzy_set
|
38
|
+
FuzzyTree64.new( 500_000, 3, 16 )
|
39
|
+
end
|
40
|
+
|
41
|
+
def jms_context
|
42
|
+
ctx = QpidJMSContext.new
|
43
|
+
Destinations.apply( ctx )
|
44
|
+
ctx
|
45
|
+
end
|
46
|
+
|
47
|
+
def jms_connector( ctx )
|
48
|
+
connector = JMSConnector.new( ctx )
|
49
|
+
connector.max_connect_delay = java.lang.Integer::MAX_VALUE
|
50
|
+
connector.do_close_connections = false
|
51
|
+
connector
|
52
|
+
end
|
53
|
+
|
54
|
+
def run
|
55
|
+
ctx = jms_context
|
56
|
+
Hooker.apply( [ :jms, :context ], ctx )
|
57
|
+
|
58
|
+
connector = jms_connector( ctx )
|
59
|
+
Hooker.apply( [ :jms, :connector ], connector )
|
60
|
+
|
61
|
+
service = Service.new( fuzzy_set )
|
62
|
+
Hooker.apply( [ :iudex, :brutefuzzy_service ], service )
|
63
|
+
|
64
|
+
Hooker.log_not_applied # All hooks should be used by now
|
65
|
+
|
66
|
+
connector.add_connect_listener( service )
|
67
|
+
connector.connect_loop
|
68
|
+
end
|
69
|
+
|
70
|
+
end
|
71
|
+
end
|
@@ -0,0 +1,23 @@
|
|
1
|
+
#--
|
2
|
+
# Copyright (c) 2008-2012 David Kellum
|
3
|
+
#
|
4
|
+
# Licensed under the Apache License, Version 2.0 (the "License"); you
|
5
|
+
# may not use this file except in compliance with the License. You may
|
6
|
+
# obtain a copy of the License at
|
7
|
+
#
|
8
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
9
|
+
#
|
10
|
+
# Unless required by applicable law or agreed to in writing, software
|
11
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
12
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
13
|
+
# implied. See the License for the specific language governing
|
14
|
+
# permissions and limitations under the License.
|
15
|
+
#++
|
16
|
+
|
17
|
+
module Iudex
|
18
|
+
module BruteFuzzy
|
19
|
+
module Service
|
20
|
+
VERSION = '1.2.b.0'
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
@@ -0,0 +1,71 @@
|
|
1
|
+
#--
|
2
|
+
# Copyright (c) 2008-2012 David Kellum
|
3
|
+
#
|
4
|
+
# Licensed under the Apache License, Version 2.0 (the "License"); you
|
5
|
+
# may not use this file except in compliance with the License. You may
|
6
|
+
# obtain a copy of the License at
|
7
|
+
#
|
8
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
9
|
+
#
|
10
|
+
# Unless required by applicable law or agreed to in writing, software
|
11
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
12
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
13
|
+
# implied. See the License for the specific language governing
|
14
|
+
# permissions and limitations under the License.
|
15
|
+
#++
|
16
|
+
|
17
|
+
module Iudex::BruteFuzzy::Service
|
18
|
+
|
19
|
+
module Destinations
|
20
|
+
|
21
|
+
def self.apply( ctx )
|
22
|
+
|
23
|
+
ctx.destinations[ 'brutefuzzy-response-ex' ] = {
|
24
|
+
:assert => :sender,
|
25
|
+
:create => :sender,
|
26
|
+
:node => {
|
27
|
+
:type => :topic,
|
28
|
+
'x-declare' => {
|
29
|
+
:type => :fanout,
|
30
|
+
}
|
31
|
+
}
|
32
|
+
}
|
33
|
+
|
34
|
+
# Direct request writes are are needed for querying depth
|
35
|
+
# (explicit flow control). Thus no exchange here.
|
36
|
+
ctx.destinations[ 'brutefuzzy-request' ] = {
|
37
|
+
:assert => :always,
|
38
|
+
:create => :always,
|
39
|
+
:node => {
|
40
|
+
:type => :queue,
|
41
|
+
'x-declare' => {
|
42
|
+
:arguments => {
|
43
|
+
'qpid.max_size' => 500_000,
|
44
|
+
'qpid.policy_type' => :reject,
|
45
|
+
}
|
46
|
+
}
|
47
|
+
}
|
48
|
+
}
|
49
|
+
|
50
|
+
ctx.destinations[ 'brutefuzzy-client' ] = {
|
51
|
+
:address => ctx.address_per_process( 'brutefuzzy-client' ),
|
52
|
+
:assert => :receiver,
|
53
|
+
:create => :receiver,
|
54
|
+
:delete => :receiver,
|
55
|
+
:node => {
|
56
|
+
:type => :queue,
|
57
|
+
'x-bindings' => [ { :exchange => 'brutefuzzy-response-ex' } ],
|
58
|
+
'x-declare' => {
|
59
|
+
'auto-delete' => true,
|
60
|
+
:arguments => {
|
61
|
+
'qpid.max_size' => 500_000,
|
62
|
+
'qpid.policy_type' => :ring,
|
63
|
+
}
|
64
|
+
}
|
65
|
+
}
|
66
|
+
}
|
67
|
+
|
68
|
+
end
|
69
|
+
end
|
70
|
+
|
71
|
+
end
|
Binary file
|
data/pom.xml
ADDED
@@ -0,0 +1,63 @@
|
|
1
|
+
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
|
2
|
+
<modelVersion>4.0.0</modelVersion>
|
3
|
+
<groupId>iudex</groupId>
|
4
|
+
<artifactId>iudex-brutefuzzy-service</artifactId>
|
5
|
+
<packaging>jar</packaging>
|
6
|
+
<version>1.2.b.0</version>
|
7
|
+
<name>Iudex BruteFuzzy Service</name>
|
8
|
+
|
9
|
+
<parent>
|
10
|
+
<groupId>iudex</groupId>
|
11
|
+
<artifactId>iudex-parent</artifactId>
|
12
|
+
<version>1.2.b.0</version>
|
13
|
+
<relativePath>..</relativePath>
|
14
|
+
</parent>
|
15
|
+
|
16
|
+
<dependencies>
|
17
|
+
|
18
|
+
<dependency>
|
19
|
+
<groupId>iudex</groupId>
|
20
|
+
<artifactId>iudex-core</artifactId>
|
21
|
+
<version>[1.2,1.2.9999)</version>
|
22
|
+
</dependency>
|
23
|
+
|
24
|
+
<dependency>
|
25
|
+
<groupId>iudex</groupId>
|
26
|
+
<artifactId>iudex-brutefuzzy-protobuf</artifactId>
|
27
|
+
<version>[1.2,1.2.9999)</version>
|
28
|
+
</dependency>
|
29
|
+
|
30
|
+
<dependency>
|
31
|
+
<groupId>iudex</groupId>
|
32
|
+
<artifactId>iudex-simhash</artifactId>
|
33
|
+
<version>[1.2,1.2.9999)</version>
|
34
|
+
</dependency>
|
35
|
+
|
36
|
+
<dependency>
|
37
|
+
<groupId>rjack</groupId>
|
38
|
+
<artifactId>rjack-jms</artifactId>
|
39
|
+
<version>[1.1,1.2)</version>
|
40
|
+
</dependency>
|
41
|
+
|
42
|
+
<dependency>
|
43
|
+
<groupId>org.apache.qpid</groupId>
|
44
|
+
<artifactId>qpid-client</artifactId>
|
45
|
+
<version>0.14</version>
|
46
|
+
</dependency>
|
47
|
+
|
48
|
+
</dependencies>
|
49
|
+
|
50
|
+
<build>
|
51
|
+
<plugins>
|
52
|
+
<plugin>
|
53
|
+
<!-- Parent settings -->
|
54
|
+
<artifactId>maven-compiler-plugin</artifactId>
|
55
|
+
</plugin>
|
56
|
+
<plugin>
|
57
|
+
<!-- Parent settings -->
|
58
|
+
<artifactId>maven-source-plugin</artifactId>
|
59
|
+
</plugin>
|
60
|
+
</plugins>
|
61
|
+
</build>
|
62
|
+
|
63
|
+
</project>
|
data/test/setup.rb
ADDED
@@ -0,0 +1,44 @@
|
|
1
|
+
#--
|
2
|
+
# Copyright (c) 2008-2012 David Kellum
|
3
|
+
#
|
4
|
+
# Licensed under the Apache License, Version 2.0 (the "License"); you
|
5
|
+
# may not use this file except in compliance with the License. You
|
6
|
+
# may obtain a copy of the License at
|
7
|
+
#
|
8
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
9
|
+
#
|
10
|
+
# Unless required by applicable law or agreed to in writing, software
|
11
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
12
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
13
|
+
# implied. See the License for the specific language governing
|
14
|
+
# permissions and limitations under the License.
|
15
|
+
#++
|
16
|
+
|
17
|
+
#### General test setup, logging, console output ####
|
18
|
+
|
19
|
+
require 'rubygems'
|
20
|
+
require 'bundler/setup'
|
21
|
+
|
22
|
+
require 'minitest/unit'
|
23
|
+
require 'minitest/autorun'
|
24
|
+
|
25
|
+
require 'rjack-logback'
|
26
|
+
|
27
|
+
module TestSetup
|
28
|
+
include RJack
|
29
|
+
Logback.config_console( :stderr => true, :thread => true )
|
30
|
+
|
31
|
+
if ( ARGV & %w[ -v --verbose --debug ] ).empty?
|
32
|
+
# Make test output logging compatible: no partial lines.
|
33
|
+
class TestOut
|
34
|
+
def print( *a ); $stdout.puts( *a ); end
|
35
|
+
def puts( *a ); $stdout.puts( *a ); end
|
36
|
+
end
|
37
|
+
MiniTest::Unit.output = TestOut.new
|
38
|
+
else
|
39
|
+
Logback.root.level = Logback::DEBUG
|
40
|
+
end
|
41
|
+
|
42
|
+
ARGV.delete( '--debug' )
|
43
|
+
|
44
|
+
end
|
data/test/test_agent.rb
ADDED
@@ -0,0 +1,64 @@
|
|
1
|
+
#!/usr/bin/env jruby
|
2
|
+
#.hashdot.profile += jruby-shortlived
|
3
|
+
|
4
|
+
#--
|
5
|
+
# Copyright (c) 2008-2012 David Kellum
|
6
|
+
#
|
7
|
+
# Licensed under the Apache License, Version 2.0 (the "License"); you
|
8
|
+
# may not use this file except in compliance with the License. You may
|
9
|
+
# obtain a copy of the License at
|
10
|
+
#
|
11
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
12
|
+
#
|
13
|
+
# Unless required by applicable law or agreed to in writing, software
|
14
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
15
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
16
|
+
# implied. See the License for the specific language governing
|
17
|
+
# permissions and limitations under the License.
|
18
|
+
#++
|
19
|
+
|
20
|
+
require File.join( File.dirname( __FILE__ ), "setup" )
|
21
|
+
|
22
|
+
require 'iudex-brutefuzzy-service'
|
23
|
+
|
24
|
+
class TestAgent < MiniTest::Unit::TestCase
|
25
|
+
include Iudex::BruteFuzzy::Service
|
26
|
+
|
27
|
+
def teardown
|
28
|
+
Hooker.send( :clear )
|
29
|
+
end
|
30
|
+
|
31
|
+
def test_agent_default
|
32
|
+
assert_agent
|
33
|
+
end
|
34
|
+
|
35
|
+
def test_agent_with_sample_config
|
36
|
+
|
37
|
+
# Test out the sample config
|
38
|
+
Hooker.load_file( File.join( File.dirname( __FILE__ ),
|
39
|
+
'..', 'config', 'config.rb' ) )
|
40
|
+
assert_agent
|
41
|
+
end
|
42
|
+
|
43
|
+
def assert_agent
|
44
|
+
|
45
|
+
agent = Agent.new
|
46
|
+
|
47
|
+
def agent.jms_connector( ctx )
|
48
|
+
c = super
|
49
|
+
def c.connect_loop
|
50
|
+
:test_run_return
|
51
|
+
end
|
52
|
+
c
|
53
|
+
end
|
54
|
+
|
55
|
+
assert_equal( agent.run, :test_run_return )
|
56
|
+
|
57
|
+
Hooker.check_not_applied do |*args|
|
58
|
+
flunk( "Hooks not applied: " + args.inspect )
|
59
|
+
end
|
60
|
+
pass
|
61
|
+
|
62
|
+
end
|
63
|
+
|
64
|
+
end
|
@@ -0,0 +1,43 @@
|
|
1
|
+
#!/usr/bin/env jruby
|
2
|
+
#.hashdot.profile += jruby-shortlived
|
3
|
+
|
4
|
+
#--
|
5
|
+
# Copyright (c) 2008-2012 David Kellum
|
6
|
+
#
|
7
|
+
# Licensed under the Apache License, Version 2.0 (the "License"); you
|
8
|
+
# may not use this file except in compliance with the License. You may
|
9
|
+
# obtain a copy of the License at
|
10
|
+
#
|
11
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
12
|
+
#
|
13
|
+
# Unless required by applicable law or agreed to in writing, software
|
14
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
15
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
16
|
+
# implied. See the License for the specific language governing
|
17
|
+
# permissions and limitations under the License.
|
18
|
+
#++
|
19
|
+
|
20
|
+
require File.join( File.dirname( __FILE__ ), "setup" )
|
21
|
+
|
22
|
+
require 'iudex-brutefuzzy-service'
|
23
|
+
require 'iudex-brutefuzzy-service/destinations'
|
24
|
+
|
25
|
+
class TestQpidContext < MiniTest::Unit::TestCase
|
26
|
+
include RJack::QpidClient
|
27
|
+
include Iudex::BruteFuzzy::Service
|
28
|
+
|
29
|
+
def test_destinations
|
30
|
+
con = nil
|
31
|
+
ctx = QpidJMSContext.new
|
32
|
+
Destinations.apply( ctx )
|
33
|
+
|
34
|
+
assert( con = ctx.create_connection )
|
35
|
+
assert( ctx.lookup_destination( 'brutefuzzy-request' ) )
|
36
|
+
assert( ctx.lookup_destination( 'brutefuzzy-response-ex' ) )
|
37
|
+
assert( ctx.lookup_destination( 'brutefuzzy-client' ) )
|
38
|
+
ensure
|
39
|
+
ctx.close if ctx
|
40
|
+
con.close if con
|
41
|
+
end
|
42
|
+
|
43
|
+
end
|
metadata
ADDED
@@ -0,0 +1,164 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: iudex-brutefuzzy-service
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
prerelease: 4
|
5
|
+
version: 1.2.b.0
|
6
|
+
platform: java
|
7
|
+
authors:
|
8
|
+
- David Kellum
|
9
|
+
autorequire:
|
10
|
+
bindir: bin
|
11
|
+
cert_chain: []
|
12
|
+
|
13
|
+
date: 2012-03-05 00:00:00 Z
|
14
|
+
dependencies:
|
15
|
+
- !ruby/object:Gem::Dependency
|
16
|
+
name: iudex-core
|
17
|
+
version_requirements: &id001 !ruby/object:Gem::Requirement
|
18
|
+
none: false
|
19
|
+
requirements:
|
20
|
+
- - ~>
|
21
|
+
- !ruby/object:Gem::Version
|
22
|
+
version: 1.2.b
|
23
|
+
requirement: *id001
|
24
|
+
prerelease: false
|
25
|
+
type: :runtime
|
26
|
+
- !ruby/object:Gem::Dependency
|
27
|
+
name: rjack-logback
|
28
|
+
version_requirements: &id002 !ruby/object:Gem::Requirement
|
29
|
+
none: false
|
30
|
+
requirements:
|
31
|
+
- - ~>
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: "1.0"
|
34
|
+
requirement: *id002
|
35
|
+
prerelease: false
|
36
|
+
type: :runtime
|
37
|
+
- !ruby/object:Gem::Dependency
|
38
|
+
name: iudex-simhash
|
39
|
+
version_requirements: &id003 !ruby/object:Gem::Requirement
|
40
|
+
none: false
|
41
|
+
requirements:
|
42
|
+
- - ~>
|
43
|
+
- !ruby/object:Gem::Version
|
44
|
+
version: 1.2.b
|
45
|
+
requirement: *id003
|
46
|
+
prerelease: false
|
47
|
+
type: :runtime
|
48
|
+
- !ruby/object:Gem::Dependency
|
49
|
+
name: iudex-brutefuzzy-protobuf
|
50
|
+
version_requirements: &id004 !ruby/object:Gem::Requirement
|
51
|
+
none: false
|
52
|
+
requirements:
|
53
|
+
- - ~>
|
54
|
+
- !ruby/object:Gem::Version
|
55
|
+
version: 1.2.b
|
56
|
+
requirement: *id004
|
57
|
+
prerelease: false
|
58
|
+
type: :runtime
|
59
|
+
- !ruby/object:Gem::Dependency
|
60
|
+
name: rjack-jms
|
61
|
+
version_requirements: &id005 !ruby/object:Gem::Requirement
|
62
|
+
none: false
|
63
|
+
requirements:
|
64
|
+
- - ~>
|
65
|
+
- !ruby/object:Gem::Version
|
66
|
+
version: 1.1.0
|
67
|
+
requirement: *id005
|
68
|
+
prerelease: false
|
69
|
+
type: :runtime
|
70
|
+
- !ruby/object:Gem::Dependency
|
71
|
+
name: rjack-qpid-client
|
72
|
+
version_requirements: &id006 !ruby/object:Gem::Requirement
|
73
|
+
none: false
|
74
|
+
requirements:
|
75
|
+
- - ~>
|
76
|
+
- !ruby/object:Gem::Version
|
77
|
+
version: 0.14.0
|
78
|
+
requirement: *id006
|
79
|
+
prerelease: false
|
80
|
+
type: :runtime
|
81
|
+
- !ruby/object:Gem::Dependency
|
82
|
+
name: minitest
|
83
|
+
version_requirements: &id007 !ruby/object:Gem::Requirement
|
84
|
+
none: false
|
85
|
+
requirements:
|
86
|
+
- - ~>
|
87
|
+
- !ruby/object:Gem::Version
|
88
|
+
version: "2.3"
|
89
|
+
requirement: *id007
|
90
|
+
prerelease: false
|
91
|
+
type: :development
|
92
|
+
- !ruby/object:Gem::Dependency
|
93
|
+
name: rjack-tarpit
|
94
|
+
version_requirements: &id008 !ruby/object:Gem::Requirement
|
95
|
+
none: false
|
96
|
+
requirements:
|
97
|
+
- - ~>
|
98
|
+
- !ruby/object:Gem::Version
|
99
|
+
version: "2.0"
|
100
|
+
requirement: *id008
|
101
|
+
prerelease: false
|
102
|
+
type: :development
|
103
|
+
description: Iudex is a general purpose web crawler and feed processor in ruby/java. The iudex-brutefuzzy-service provides a fuzzy simhash lookup index as a distributed service.
|
104
|
+
email:
|
105
|
+
- dek-oss@gravitext.com
|
106
|
+
executables:
|
107
|
+
- iudex-brutefuzzy-client
|
108
|
+
- iudex-brutefuzzy-service-fg
|
109
|
+
extensions: []
|
110
|
+
|
111
|
+
extra_rdoc_files:
|
112
|
+
- History.rdoc
|
113
|
+
- README.rdoc
|
114
|
+
files:
|
115
|
+
- History.rdoc
|
116
|
+
- Manifest.txt
|
117
|
+
- README.rdoc
|
118
|
+
- Rakefile
|
119
|
+
- pom.xml
|
120
|
+
- bin/iudex-brutefuzzy-client
|
121
|
+
- bin/iudex-brutefuzzy-service-fg
|
122
|
+
- config/config.rb
|
123
|
+
- init/iudex-brutefuzzy-service
|
124
|
+
- lib/iudex-brutefuzzy-service/base.rb
|
125
|
+
- lib/iudex-brutefuzzy-service.rb
|
126
|
+
- lib/iudex-brutefuzzy-service/agent.rb
|
127
|
+
- lib/iudex-brutefuzzy-service/destinations.rb
|
128
|
+
- test/setup.rb
|
129
|
+
- test/test_agent.rb
|
130
|
+
- test/test_qpid_context.rb
|
131
|
+
- lib/iudex-brutefuzzy-service/iudex-brutefuzzy-service-1.2.b.0.jar
|
132
|
+
homepage: http://github.com/dekellum/iudex
|
133
|
+
licenses: []
|
134
|
+
|
135
|
+
post_install_message:
|
136
|
+
rdoc_options:
|
137
|
+
- --main
|
138
|
+
- README.rdoc
|
139
|
+
require_paths:
|
140
|
+
- lib
|
141
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
142
|
+
none: false
|
143
|
+
requirements:
|
144
|
+
- - ">="
|
145
|
+
- !ruby/object:Gem::Version
|
146
|
+
hash: 2
|
147
|
+
segments:
|
148
|
+
- 0
|
149
|
+
version: "0"
|
150
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
151
|
+
none: false
|
152
|
+
requirements:
|
153
|
+
- - ">"
|
154
|
+
- !ruby/object:Gem::Version
|
155
|
+
version: 1.3.1
|
156
|
+
requirements: []
|
157
|
+
|
158
|
+
rubyforge_project:
|
159
|
+
rubygems_version: 1.8.15
|
160
|
+
signing_key:
|
161
|
+
specification_version: 3
|
162
|
+
summary: Iudex is a general purpose web crawler and feed processor in ruby/java.
|
163
|
+
test_files: []
|
164
|
+
|