code_zauker 0.0.5 → 0.0.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.gitignore +1 -0
- data/bin/czindexer +23 -1
- data/bin/czsearch +19 -3
- data/lib/code_zauker/cli.rb +40 -0
- data/lib/code_zauker/constants.rb +3 -1
- data/lib/code_zauker/version.rb +1 -1
- data/lib/code_zauker/webgui.rb +18 -1
- data/readme.org +26 -10
- data/templates/show_results.erb +8 -3
- metadata +16 -15
data/.gitignore
CHANGED
data/bin/czindexer
CHANGED
@@ -2,11 +2,18 @@
|
|
2
2
|
# Suggested execution is mixing find / xargs with the parallel (P) parameters:
|
3
3
|
# find test/fixture/ -type f | xargs -P 5 -n 10 ./bin/czindexer
|
4
4
|
# will fire 5 czindexer each with 10 files to process...
|
5
|
+
require 'code_zauker/cli'
|
6
|
+
require 'redis/connection/hiredis'
|
7
|
+
require 'redis'
|
5
8
|
require 'optparse'
|
6
9
|
options={}
|
7
10
|
optparse= OptionParser.new do |opts|
|
8
11
|
opts.banner="Usage: czindexer [options] [file1] [file2]..."
|
9
12
|
options[:verbose] = false
|
13
|
+
options[:redis_host]="127.0.0.1"
|
14
|
+
options[:redis_port]=6379
|
15
|
+
options[:redis_password]=nil
|
16
|
+
|
10
17
|
|
11
18
|
opts.on( '-v', '--verbose', 'Output more information' ) do
|
12
19
|
options[:verbose] = true
|
@@ -17,6 +24,20 @@ optparse= OptionParser.new do |opts|
|
|
17
24
|
options[:reindex]=true
|
18
25
|
end
|
19
26
|
|
27
|
+
opts.on('-h','--redis-server pass@SERVER:port', String,
|
28
|
+
'Specify the alternate redis server to use')do |server|
|
29
|
+
myoptions=CodeZauker::CliUtil.new().parse_host_options(server)
|
30
|
+
options[:redis_host]=myoptions[:redis_host]
|
31
|
+
options[:redis_port]=myoptions[:redis_port]
|
32
|
+
options[:redis_password]=myoptions[:redis_password]
|
33
|
+
|
34
|
+
if options[:redis_password]
|
35
|
+
puts "Server: #{options[:redis_host]} Port:#{options[:redis_port]} WithPassword"
|
36
|
+
else
|
37
|
+
puts "Server: #{options[:redis_host]} Port:#{options[:redis_port]}"
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
20
41
|
opts.on( '-h', '--help', 'Display this screen' ) do
|
21
42
|
puts opts
|
22
43
|
exit
|
@@ -82,7 +103,8 @@ end
|
|
82
103
|
begin
|
83
104
|
$CUMULATED_TIME=0
|
84
105
|
# Allocated here to recycle connection
|
85
|
-
|
106
|
+
redisConnection=Redis.new(:host => options[:redis_host], :port => options[:redis_port], :password=> options[:redis_password])
|
107
|
+
fs=CodeZauker::FileScanner.new(redisConnection)
|
86
108
|
$PROCESSED_FILES=0
|
87
109
|
puts "Code Zauker v#{CodeZauker::VERSION}" if options[:verbose]
|
88
110
|
puts "Reindexing..." if options[:verbose]==true and options[:reindex]==true
|
data/bin/czsearch
CHANGED
@@ -4,6 +4,9 @@
|
|
4
4
|
$VERBOSE=nil
|
5
5
|
require 'code_zauker'
|
6
6
|
require 'code_zauker/grep'
|
7
|
+
require 'code_zauker/cli'
|
8
|
+
require 'redis/connection/hiredis'
|
9
|
+
require 'redis'
|
7
10
|
require 'tempfile'
|
8
11
|
require 'pdf/reader'
|
9
12
|
include Grep
|
@@ -17,6 +20,9 @@ optparse= OptionParser.new do |opts|
|
|
17
20
|
options[:postcontext]=0
|
18
21
|
options[:extensions_to_ignore]=[]
|
19
22
|
options[:file_to_exclude]=[]
|
23
|
+
options[:redis_host]="127.0.0.1"
|
24
|
+
options[:redis_port]=6379
|
25
|
+
options[:redis_password]=nil
|
20
26
|
|
21
27
|
opts.on('-i', '--ignore-case','ignore case distinctions') do
|
22
28
|
options[:ignorecase]=true
|
@@ -44,9 +50,18 @@ optparse= OptionParser.new do |opts|
|
|
44
50
|
end
|
45
51
|
|
46
52
|
|
47
|
-
opts.on('-h','--redis-server SERVER', String,
|
53
|
+
opts.on('-h','--redis-server pass@SERVER:port', String,
|
48
54
|
'Specify the alternate redis server to use')do |server|
|
49
|
-
|
55
|
+
myoptions=CodeZauker::CliUtil.new().parse_host_options(server)
|
56
|
+
options[:redis_host]=myoptions[:redis_host]
|
57
|
+
options[:redis_port]=myoptions[:redis_port]
|
58
|
+
options[:redis_password]=myoptions[:redis_password]
|
59
|
+
|
60
|
+
if options[:redis_password]
|
61
|
+
puts "Server: #{options[:redis_host]} Port:#{options[:redis_port]} WithPassword"
|
62
|
+
else
|
63
|
+
puts "Server: #{options[:redis_host]} Port:#{options[:redis_port]}"
|
64
|
+
end
|
50
65
|
end
|
51
66
|
|
52
67
|
|
@@ -69,7 +84,8 @@ optparse.parse!
|
|
69
84
|
ARGV.each do | s |
|
70
85
|
#puts "Code Zauker Searching for #{s}"
|
71
86
|
util=CodeZauker::Util.new()
|
72
|
-
|
87
|
+
redisConnection=Redis.new(:host => options[:redis_host], :port => options[:redis_port], :password=> options[:redis_password])
|
88
|
+
fs=CodeZauker::FileScanner.new(redisConnection)
|
73
89
|
if options[:ignorecase]==false
|
74
90
|
files=fs.search(s)
|
75
91
|
pattern=/#{Regexp.escape(s)}/
|
@@ -0,0 +1,40 @@
|
|
1
|
+
module CodeZauker
|
2
|
+
|
3
|
+
class CliUtil
|
4
|
+
|
5
|
+
def parse_host_options(connection_string)
|
6
|
+
#puts "Parsing... #{connection_string}"
|
7
|
+
options={}
|
8
|
+
options[:redis_host]="127.0.0.1"
|
9
|
+
options[:redis_port]=6379
|
10
|
+
options[:redis_password]=nil
|
11
|
+
r=/(\w+)@([a-zA-Z0-9.]+):([0-9]+)?/
|
12
|
+
rNoPass=/([a-zA-Z0-9.]+):([0-9]+)?/
|
13
|
+
rHostAndPass=/(\w+)@([a-zA-Z0-9.]+)/
|
14
|
+
m=r.match(connection_string)
|
15
|
+
if m
|
16
|
+
options[:redis_password]=m.captures[0]
|
17
|
+
options[:redis_host]=m.captures[1]
|
18
|
+
options[:redis_port]=m.captures[2]
|
19
|
+
|
20
|
+
else
|
21
|
+
m=rNoPass.match(connection_string)
|
22
|
+
if m
|
23
|
+
options[:redis_host]=m.captures[0]
|
24
|
+
options[:redis_port]=m.captures[1]
|
25
|
+
else
|
26
|
+
# Check the auth@host case right here
|
27
|
+
m2=rHostAndPass.match(connection_string)
|
28
|
+
if m2
|
29
|
+
options[:redis_password]=m2.captures[0]
|
30
|
+
options[:redis_host]=m2.captures[1]
|
31
|
+
else
|
32
|
+
#puts "SERVER ONLY"
|
33
|
+
options[:redis_host]=connection_string
|
34
|
+
end
|
35
|
+
end
|
36
|
+
end
|
37
|
+
return options
|
38
|
+
end
|
39
|
+
end
|
40
|
+
end
|
@@ -1,5 +1,7 @@
|
|
1
1
|
module CodeZauker
|
2
|
-
|
2
|
+
# Under Amazon AWS, a lot of timeout can happen.
|
3
|
+
# We put a higer retry here
|
4
|
+
MAX_PUSH_TRIGRAM_RETRIES=15
|
3
5
|
# Stats
|
4
6
|
# It is difficult to decide what is the best trigram push size.
|
5
7
|
# a larger one ensure a best in memory processing but can lead to longer transactions
|
data/lib/code_zauker/version.rb
CHANGED
data/lib/code_zauker/webgui.rb
CHANGED
@@ -3,6 +3,8 @@ require 'sinatra'
|
|
3
3
|
require "code_zauker/version"
|
4
4
|
require "code_zauker"
|
5
5
|
require "erb"
|
6
|
+
require 'code_zauker/grep'
|
7
|
+
include Grep
|
6
8
|
|
7
9
|
# See http://www.sinatrarb.com/intro
|
8
10
|
get '/' do
|
@@ -14,7 +16,22 @@ get '/search' do
|
|
14
16
|
# Process the search and show the results...
|
15
17
|
fs=CodeZauker::FileScanner.new()
|
16
18
|
files=fs.isearch(params[:q])
|
17
|
-
|
19
|
+
util=CodeZauker::Util.new()
|
20
|
+
abstracts=[]
|
21
|
+
files.each do |f|
|
22
|
+
if util.is_pdf?(f)==false
|
23
|
+
askedQuery=params[:q]
|
24
|
+
pattern=/#{Regexp.escape(askedQuery)}/i
|
25
|
+
lines=grep(f,pattern, pre_context=2, post_context=2);
|
26
|
+
desc=""
|
27
|
+
lines.each do |l |
|
28
|
+
hilighted=l.gsub(/(#{Regexp.escape(askedQuery)})/i){ "<b>#{$1}</b>"}
|
29
|
+
desc=desc+ "#{f}:#{hilighted}\n"
|
30
|
+
end
|
31
|
+
abstracts.push(desc)
|
32
|
+
end
|
33
|
+
end
|
34
|
+
erb :show_results, :locals => {:files => abstracts, :q => params[:q] }
|
18
35
|
end
|
19
36
|
|
20
37
|
configure do
|
data/readme.org
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
* Code Zauker: your code, indexed
|
2
2
|
Code Zauker is a search engine for programming languages.
|
3
|
-
Code Zauker is based from ideas taken by old Google Code Search and uses Redis as
|
3
|
+
Code Zauker is based from ideas taken by old Google Code Search and uses Redis as storage engine.
|
4
4
|
|
5
5
|
For news and discussion: http://gioorgi.com/tag/code-zauker/
|
6
6
|
|
@@ -13,17 +13,33 @@ To install Code Zauker,simply issue
|
|
13
13
|
You need also [[http://redis.io/][redis-2.4.6]] or better.
|
14
14
|
For a sample redis configuration see the etc/ directory of the project
|
15
15
|
|
16
|
+
* Try it out
|
17
|
+
Take a look to czindex and czsearch commands help for usage patterns.
|
18
|
+
|
19
|
+
|
16
20
|
|
17
21
|
* Release History
|
18
|
-
| Version | Date | Summary
|
19
|
-
|
20
|
-
| 0.0.
|
21
|
-
| 0.0.
|
22
|
-
| 0.0.
|
23
|
-
| 0.0.
|
24
|
-
| 0.0.
|
25
|
-
|
|
26
|
-
| | |
|
22
|
+
| Version | Date | Summary |
|
23
|
+
|---------+-------------+-----------------------------------------------------------------|
|
24
|
+
| 0.0.6 | 04 May 2012 | New redis-server option. Better web search with results hilight |
|
25
|
+
| 0.0.5 | 09 Apr 2012 | Added Sinatra-based web search page, featuring bootrstrap css |
|
26
|
+
| 0.0.4 | 12 Feb 2012 | PDF Searching |
|
27
|
+
| 0.0.3 | 03 Feb 2012 | Added Case insensitive search.UTF-8 trigram database |
|
28
|
+
| 0.0.2 | 29 Jan 2012 | Removed dependency on unix find for czindexer. |
|
29
|
+
| 0.0.1 | 26 Jan 2012 | First RubyGems Release (for testing purpose only) |
|
30
|
+
| | | |
|
31
|
+
| | | |
|
32
|
+
|
33
|
+
* Aws tests
|
34
|
+
** Micro instance
|
35
|
+
Without multiplexing you get
|
36
|
+
real 4m39.599s
|
37
|
+
for indexing code_zauker
|
38
|
+
|
39
|
+
With
|
40
|
+
time find . -type f -print0 | xargs -0 -P 10 -n 20 ./bin/czindexer -v --redis-server awsserver
|
41
|
+
You get about
|
42
|
+
real 0m31.284s
|
27
43
|
|
28
44
|
|
29
45
|
* DEVELOPING
|
data/templates/show_results.erb
CHANGED
@@ -32,15 +32,20 @@
|
|
32
32
|
|
33
33
|
</form>
|
34
34
|
</p>
|
35
|
+
<style>
|
36
|
+
b{
|
37
|
+
color: #A1A100;
|
38
|
+
}
|
39
|
+
</style>
|
35
40
|
<span class="badge badge-success"> <%=files.length%> Results</span>
|
36
41
|
<ol>
|
37
42
|
<%
|
38
43
|
files.each do |f|
|
39
|
-
|
44
|
+
%>
|
40
45
|
<li><%=f%>
|
41
|
-
|
46
|
+
<%
|
42
47
|
end
|
43
|
-
|
48
|
+
%>
|
44
49
|
</ol>
|
45
50
|
</div>
|
46
51
|
</div>
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: code_zauker
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.6
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,11 +9,11 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2012-04
|
12
|
+
date: 2012-05-04 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: yard
|
16
|
-
requirement: &
|
16
|
+
requirement: &74198570 !ruby/object:Gem::Requirement
|
17
17
|
none: false
|
18
18
|
requirements:
|
19
19
|
- - ~>
|
@@ -21,10 +21,10 @@ dependencies:
|
|
21
21
|
version: '0.7'
|
22
22
|
type: :development
|
23
23
|
prerelease: false
|
24
|
-
version_requirements: *
|
24
|
+
version_requirements: *74198570
|
25
25
|
- !ruby/object:Gem::Dependency
|
26
26
|
name: rubyzip
|
27
|
-
requirement: &
|
27
|
+
requirement: &74197690 !ruby/object:Gem::Requirement
|
28
28
|
none: false
|
29
29
|
requirements:
|
30
30
|
- - ~>
|
@@ -32,10 +32,10 @@ dependencies:
|
|
32
32
|
version: '0.9'
|
33
33
|
type: :development
|
34
34
|
prerelease: false
|
35
|
-
version_requirements: *
|
35
|
+
version_requirements: *74197690
|
36
36
|
- !ruby/object:Gem::Dependency
|
37
37
|
name: hiredis
|
38
|
-
requirement: &
|
38
|
+
requirement: &74196880 !ruby/object:Gem::Requirement
|
39
39
|
none: false
|
40
40
|
requirements:
|
41
41
|
- - ~>
|
@@ -43,10 +43,10 @@ dependencies:
|
|
43
43
|
version: '0.3'
|
44
44
|
type: :runtime
|
45
45
|
prerelease: false
|
46
|
-
version_requirements: *
|
46
|
+
version_requirements: *74196880
|
47
47
|
- !ruby/object:Gem::Dependency
|
48
48
|
name: redis
|
49
|
-
requirement: &
|
49
|
+
requirement: &74196090 !ruby/object:Gem::Requirement
|
50
50
|
none: false
|
51
51
|
requirements:
|
52
52
|
- - ~>
|
@@ -54,10 +54,10 @@ dependencies:
|
|
54
54
|
version: '2.2'
|
55
55
|
type: :runtime
|
56
56
|
prerelease: false
|
57
|
-
version_requirements: *
|
57
|
+
version_requirements: *74196090
|
58
58
|
- !ruby/object:Gem::Dependency
|
59
59
|
name: pdf-reader
|
60
|
-
requirement: &
|
60
|
+
requirement: &74187710 !ruby/object:Gem::Requirement
|
61
61
|
none: false
|
62
62
|
requirements:
|
63
63
|
- - ~>
|
@@ -65,10 +65,10 @@ dependencies:
|
|
65
65
|
version: 1.0.0
|
66
66
|
type: :runtime
|
67
67
|
prerelease: false
|
68
|
-
version_requirements: *
|
68
|
+
version_requirements: *74187710
|
69
69
|
- !ruby/object:Gem::Dependency
|
70
70
|
name: sinatra
|
71
|
-
requirement: &
|
71
|
+
requirement: &74186780 !ruby/object:Gem::Requirement
|
72
72
|
none: false
|
73
73
|
requirements:
|
74
74
|
- - ~>
|
@@ -76,7 +76,7 @@ dependencies:
|
|
76
76
|
version: '1.3'
|
77
77
|
type: :runtime
|
78
78
|
prerelease: false
|
79
|
-
version_requirements: *
|
79
|
+
version_requirements: *74186780
|
80
80
|
description: Code Zauker is based from ideas taken by old Google Code Search and uses
|
81
81
|
Redis as a basic platform
|
82
82
|
email:
|
@@ -129,6 +129,7 @@ files:
|
|
129
129
|
- htdocs/js/bootstrap.js
|
130
130
|
- htdocs/js/bootstrap.min.js
|
131
131
|
- lib/code_zauker.rb
|
132
|
+
- lib/code_zauker/cli.rb
|
132
133
|
- lib/code_zauker/constants.rb
|
133
134
|
- lib/code_zauker/grep.rb
|
134
135
|
- lib/code_zauker/version.rb
|
@@ -163,7 +164,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
163
164
|
version: '0'
|
164
165
|
requirements: []
|
165
166
|
rubyforge_project: code_zauker
|
166
|
-
rubygems_version: 1.8.
|
167
|
+
rubygems_version: 1.8.11
|
167
168
|
signing_key:
|
168
169
|
specification_version: 3
|
169
170
|
summary: A search engine for programming languages
|