code_zauker 0.0.9 → 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- data/.gitignore +0 -0
- data/BUGS.org +6 -0
- data/Gemfile +0 -0
- data/LICENSE.txt +0 -0
- data/Rakefile +0 -0
- data/bin/czindexer +2 -2
- data/bin/czlist +122 -0
- data/bin/czsearch +2 -164
- data/bin/mczindexer +0 -0
- data/bin/report.rb +0 -1
- data/bin/startRedis +0 -0
- data/bin/webgui +0 -0
- data/code_zauker.gemspec +1 -1
- data/devel.org +50 -0
- data/doc/CodeZauker.html +0 -0
- data/doc/CodeZauker/CliUtil.html +0 -0
- data/doc/CodeZauker/FileScanner.html +0 -0
- data/doc/CodeZauker/IndexManager.html +0 -0
- data/doc/CodeZauker/Util.html +0 -0
- data/doc/Grep.html +0 -0
- data/doc/_index.html +0 -0
- data/doc/class_list.html +0 -0
- data/doc/css/common.css +0 -0
- data/doc/css/full_list.css +0 -0
- data/doc/css/style.css +0 -0
- data/doc/file_list.html +0 -0
- data/doc/frames.html +0 -0
- data/doc/index.html +0 -0
- data/doc/js/app.js +0 -0
- data/doc/js/full_list.js +0 -0
- data/doc/js/jquery.js +0 -0
- data/doc/method_list.html +0 -0
- data/doc/top-level-namespace.html +0 -0
- data/etc/redis-win.conf +2 -2
- data/etc/redis.conf +0 -0
- data/htdocs/CodeZauker.gif +0 -0
- data/htdocs/Gioorgi.gif +0 -0
- data/htdocs/css/bootstrap-responsive.css +0 -0
- data/htdocs/css/bootstrap-responsive.min.css +0 -0
- data/htdocs/css/bootstrap.css +0 -0
- data/htdocs/css/bootstrap.min.css +0 -0
- data/htdocs/img/glyphicons-halflings-white.png +0 -0
- data/htdocs/img/glyphicons-halflings.png +0 -0
- data/htdocs/js/bootstrap.js +0 -0
- data/htdocs/js/bootstrap.min.js +0 -0
- data/lib/code_zauker.rb +29 -13
- data/lib/code_zauker/cli.rb +3 -3
- data/lib/code_zauker/constants.rb +2 -2
- data/lib/code_zauker/grep.rb +0 -0
- data/lib/code_zauker/version.rb +1 -1
- data/lib/code_zauker/webgui.rb +0 -0
- data/readme.org +26 -4
- data/templates/search.erb +0 -0
- data/templates/show_results.erb +0 -0
- data/test/fixture/TEST_LICENSE.txt +0 -0
- data/test/fixture/foolish.txt +0 -0
- data/test/fixture/kurukku.txt +0 -0
- data/test/fixture/simple_test.pdf +0 -0
- data/test/fixture/testArchive.zip +0 -0
- data/test/fixture/wildtest.txt +0 -0
- data/test/test_pdf_indexing.rb +0 -0
- data/test/test_search.rb +7 -7
- data/test/test_wild_search.rb +0 -0
- metadata +43 -22
data/.gitignore
CHANGED
File without changes
|
data/BUGS.org
CHANGED
@@ -2,3 +2,9 @@
|
|
2
2
|
* Bug 001 :wontfix_soon:
|
3
3
|
Indexing a 700Kb gem take too much time, it seems looping
|
4
4
|
Avoid indexing gem file for the meantime.
|
5
|
+
* Bug 002 :limitation:
|
6
|
+
Reindexing does not work very well. Code Zauker will not automatically detect a file has changed.
|
7
|
+
Implement a md5 checksum support for reindexing changed files fast
|
8
|
+
|
9
|
+
* Bug 003 :low:
|
10
|
+
Avoid keys() command because it is deprecated for normal usage scenario
|
data/Gemfile
CHANGED
File without changes
|
data/LICENSE.txt
CHANGED
File without changes
|
data/Rakefile
CHANGED
File without changes
|
data/bin/czindexer
CHANGED
@@ -3,7 +3,7 @@
|
|
3
3
|
# find test/fixture/ -type f | xargs -P 5 -n 10 ./bin/czindexer
|
4
4
|
# will fire 5 czindexer each with 10 files to process...
|
5
5
|
require 'code_zauker/cli'
|
6
|
-
require 'redis/connection/hiredis'
|
6
|
+
#require 'redis/connection/hiredis'
|
7
7
|
require 'redis'
|
8
8
|
require 'optparse'
|
9
9
|
options={}
|
@@ -86,7 +86,7 @@ def processElement(l,fs,options)
|
|
86
86
|
if options[:reindex] == true
|
87
87
|
fs.reindex([l])
|
88
88
|
else
|
89
|
-
fs.load(l
|
89
|
+
fs.load(l)
|
90
90
|
end
|
91
91
|
timeTaken=Time.now-startTime
|
92
92
|
$PROCESSED_FILES+=1
|
data/bin/czlist
ADDED
@@ -0,0 +1,122 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
#== czlist list only the files which can contain the required string
|
3
|
+
# should be combined with "xargs grep/egrep"
|
4
|
+
# Ideal for ide integration
|
5
|
+
# Simpler then czsearch
|
6
|
+
$VERBOSE=nil
|
7
|
+
require 'code_zauker'
|
8
|
+
require 'code_zauker/cli'
|
9
|
+
#require 'redis/connection/hiredis'
|
10
|
+
require 'redis'
|
11
|
+
#include Grep
|
12
|
+
|
13
|
+
require 'optparse'
|
14
|
+
options={}
|
15
|
+
optparse= OptionParser.new do |opts|
|
16
|
+
opts.banner="Usage: czlist [options] [term1] [term2]..."
|
17
|
+
options[:extensions_to_ignore]=[]
|
18
|
+
options[:file_to_exclude]=[]
|
19
|
+
options[:redis_host]="127.0.0.1"
|
20
|
+
options[:redis_port]=6379
|
21
|
+
options[:redis_password]=nil
|
22
|
+
options[:be_wild]=true
|
23
|
+
|
24
|
+
|
25
|
+
|
26
|
+
opts.on('-X','--exclude FILE_PATTERN',String,
|
27
|
+
'Exclude files that match FILE_PATTERN (as ruby regexp). Case insensitive') do |p|
|
28
|
+
options[:file_to_exclude].push(/#{Regexp.escape(p)}/i);
|
29
|
+
end
|
30
|
+
|
31
|
+
opts.on('-w','--wild','Do a wildcharacter search. * means "every char". True by default') do
|
32
|
+
options[:be_wild] = true
|
33
|
+
options[:ignorecase]=true
|
34
|
+
end
|
35
|
+
|
36
|
+
opts.on('-e','--exact','Disable wild search Userful if you need to search * or exact matches ') do
|
37
|
+
options[:be_wild] = false
|
38
|
+
options[:ignorecase]=true
|
39
|
+
end
|
40
|
+
|
41
|
+
|
42
|
+
opts.on('--redis-server pass@SERVER:port', String,
|
43
|
+
'Specify the alternate redis server to use')do |server|
|
44
|
+
myoptions=CodeZauker::CliUtil.new().parse_host_options(server)
|
45
|
+
options[:redis_host]=myoptions[:redis_host]
|
46
|
+
options[:redis_port]=myoptions[:redis_port]
|
47
|
+
options[:redis_password]=myoptions[:redis_password]
|
48
|
+
|
49
|
+
if options[:redis_password]
|
50
|
+
#puts "Server: #{options[:redis_host]} Port:#{options[:redis_port]} WithPassword"
|
51
|
+
else
|
52
|
+
#puts "Server: #{options[:redis_host]} Port:#{options[:redis_port]}"
|
53
|
+
end
|
54
|
+
end
|
55
|
+
|
56
|
+
|
57
|
+
|
58
|
+
opts.on( '-h', '--help', 'Display this screen' ) do
|
59
|
+
puts opts
|
60
|
+
puts "EXAMPLES:"
|
61
|
+
puts "czlist for"
|
62
|
+
puts " Will search for loops and return the file smatching it"
|
63
|
+
puts "czlist -w 'public*class School'"
|
64
|
+
puts " Will seach for a java class called School ignoring characters between public and class."
|
65
|
+
puts "czlist for | xargs grep for"
|
66
|
+
puts " will be quite the same of czsearch but faster."
|
67
|
+
puts "Search is always case insensitive and wild by default"
|
68
|
+
exit
|
69
|
+
end
|
70
|
+
end
|
71
|
+
optparse.parse!
|
72
|
+
|
73
|
+
ARGV.each do | s |
|
74
|
+
#puts "Code Zauker Searching for #{s}"
|
75
|
+
util=CodeZauker::Util.new()
|
76
|
+
redisConnection=Redis.new(:host => options[:redis_host], :port => options[:redis_port], :password=> options[:redis_password])
|
77
|
+
fs=CodeZauker::FileScanner.new(redisConnection)
|
78
|
+
|
79
|
+
if options[:be_wild]==true
|
80
|
+
cli=CodeZauker::CliUtil.new()
|
81
|
+
r=cli.doWildSearch(s,fs)
|
82
|
+
files= r[:files]
|
83
|
+
pattern=r[:regexp]
|
84
|
+
else
|
85
|
+
# It uses always isearch
|
86
|
+
# and delegates to the grep subsystem to find it out
|
87
|
+
files=fs.isearch(s)
|
88
|
+
end
|
89
|
+
|
90
|
+
files.each do |f|
|
91
|
+
to_exclude=false
|
92
|
+
if options[:file_to_exclude].length >0
|
93
|
+
# Will match?
|
94
|
+
to_exclude=false
|
95
|
+
options[:file_to_exclude].each do |pattern|
|
96
|
+
#puts "\n\t#{f} =~ #{pattern}"
|
97
|
+
if (f =~ pattern )
|
98
|
+
to_exclude=true
|
99
|
+
#puts "Excluded #{f}"
|
100
|
+
break
|
101
|
+
end
|
102
|
+
end
|
103
|
+
end
|
104
|
+
|
105
|
+
# Does it exist?
|
106
|
+
if !to_exclude && !File.exists?(f)
|
107
|
+
#puts "WARN: Not FOUND #{f}"
|
108
|
+
to_exclude=true
|
109
|
+
end
|
110
|
+
|
111
|
+
if !to_exclude
|
112
|
+
begin
|
113
|
+
puts "#{f}"
|
114
|
+
rescue ArgumentError => ioe
|
115
|
+
puts "FATAL ArgumentError on #{f}"
|
116
|
+
raise ioe
|
117
|
+
end
|
118
|
+
else
|
119
|
+
|
120
|
+
end
|
121
|
+
end
|
122
|
+
end
|
data/bin/czsearch
CHANGED
@@ -1,164 +1,2 @@
|
|
1
|
-
#!/
|
2
|
-
|
3
|
-
# Send something like -W0 to ruby, for a cleaner output
|
4
|
-
$VERBOSE=nil
|
5
|
-
require 'code_zauker'
|
6
|
-
require 'code_zauker/grep'
|
7
|
-
require 'code_zauker/cli'
|
8
|
-
require 'redis/connection/hiredis'
|
9
|
-
require 'redis'
|
10
|
-
require 'tempfile'
|
11
|
-
require 'pdf/reader'
|
12
|
-
include Grep
|
13
|
-
|
14
|
-
require 'optparse'
|
15
|
-
options={}
|
16
|
-
optparse= OptionParser.new do |opts|
|
17
|
-
opts.banner="Usage: czsearch [options] [term1] [term2]..."
|
18
|
-
options[:ignorecase]=false
|
19
|
-
options[:precontext]=0
|
20
|
-
options[:postcontext]=0
|
21
|
-
options[:extensions_to_ignore]=[]
|
22
|
-
options[:file_to_exclude]=[]
|
23
|
-
options[:redis_host]="127.0.0.1"
|
24
|
-
options[:redis_port]=6379
|
25
|
-
options[:redis_password]=nil
|
26
|
-
options[:be_wild]=false
|
27
|
-
|
28
|
-
opts.on('-i', '--ignore-case','ignore case distinctions') do
|
29
|
-
options[:ignorecase]=true
|
30
|
-
end
|
31
|
-
|
32
|
-
opts.on('-B', '--before-context NUM', Integer, 'print NUM lines of leading context') do | c |
|
33
|
-
options[:precontext]=c
|
34
|
-
end
|
35
|
-
|
36
|
-
opts.on('-A','--after-context NUM',Integer,'print NUM lines of trailing context') do | c |
|
37
|
-
options[:postcontext]=c
|
38
|
-
end
|
39
|
-
opts.on('-C','--context NUM',Integer,'print NUM lines of output context') do | c |
|
40
|
-
if c>0
|
41
|
-
options[:postcontext]=c
|
42
|
-
options[:precontext]=options[:postcontext]
|
43
|
-
end
|
44
|
-
end
|
45
|
-
|
46
|
-
|
47
|
-
opts.on('-X','--exclude FILE_PATTERN',String,
|
48
|
-
'Exclude files that match FILE_PATTERN (as ruby regexp). Case insensitive') do |p|
|
49
|
-
options[:file_to_exclude].push(/#{Regexp.escape(p)}/i);
|
50
|
-
end
|
51
|
-
|
52
|
-
opts.on('-w','--wild','Do a wildcharacter search. * means "every char". Imply -i') do
|
53
|
-
options[:be_wild] = true
|
54
|
-
options[:ignorecase]=true
|
55
|
-
end
|
56
|
-
|
57
|
-
|
58
|
-
opts.on('-h','--redis-server pass@SERVER:port', String,
|
59
|
-
'Specify the alternate redis server to use')do |server|
|
60
|
-
myoptions=CodeZauker::CliUtil.new().parse_host_options(server)
|
61
|
-
options[:redis_host]=myoptions[:redis_host]
|
62
|
-
options[:redis_port]=myoptions[:redis_port]
|
63
|
-
options[:redis_password]=myoptions[:redis_password]
|
64
|
-
|
65
|
-
if options[:redis_password]
|
66
|
-
puts "Server: #{options[:redis_host]} Port:#{options[:redis_port]} WithPassword"
|
67
|
-
else
|
68
|
-
puts "Server: #{options[:redis_host]} Port:#{options[:redis_port]}"
|
69
|
-
end
|
70
|
-
end
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
opts.on( '-h', '--help', 'Display this screen' ) do
|
75
|
-
puts opts
|
76
|
-
puts "EXAMPLES:"
|
77
|
-
puts "czsearch ciao Koros"
|
78
|
-
puts " Will search Koros OR ciao"
|
79
|
-
puts "czsearch -i gnu"
|
80
|
-
puts " Will match also GNU and Gnu"
|
81
|
-
puts "czsearch -X .orig -X .bak -X .java html:select"
|
82
|
-
puts " Will skip java and backup file"
|
83
|
-
puts "czsearch -w 'public*class School'"
|
84
|
-
puts " Will seach for a java class called School ignoring characters between public and class."
|
85
|
-
exit
|
86
|
-
end
|
87
|
-
end
|
88
|
-
optparse.parse!
|
89
|
-
|
90
|
-
ARGV.each do | s |
|
91
|
-
#puts "Code Zauker Searching for #{s}"
|
92
|
-
util=CodeZauker::Util.new()
|
93
|
-
redisConnection=Redis.new(:host => options[:redis_host], :port => options[:redis_port], :password=> options[:redis_password])
|
94
|
-
fs=CodeZauker::FileScanner.new(redisConnection)
|
95
|
-
|
96
|
-
if options[:be_wild]==true
|
97
|
-
puts "Wild MODE"
|
98
|
-
cli=CodeZauker::CliUtil.new()
|
99
|
-
r=cli.doWildSearch(s,fs)
|
100
|
-
files= r[:files]
|
101
|
-
pattern=r[:regexp]
|
102
|
-
else
|
103
|
-
# It uses always isearch
|
104
|
-
# and delegates to the grep subsystem to find it out
|
105
|
-
files=fs.isearch(s)
|
106
|
-
if options[:ignorecase]==false
|
107
|
-
pattern=/#{Regexp.escape(s)}/
|
108
|
-
else
|
109
|
-
pattern=/#{Regexp.escape(s)}/i
|
110
|
-
end
|
111
|
-
end
|
112
|
-
|
113
|
-
files.each do |f|
|
114
|
-
to_exclude=false
|
115
|
-
if options[:file_to_exclude].length >0
|
116
|
-
# Will match?
|
117
|
-
to_exclude=false
|
118
|
-
options[:file_to_exclude].each do |pattern|
|
119
|
-
#puts "\n\t#{f} =~ #{pattern}"
|
120
|
-
if (f =~ pattern )
|
121
|
-
to_exclude=true
|
122
|
-
#puts "Excluded #{f}"
|
123
|
-
break
|
124
|
-
end
|
125
|
-
end
|
126
|
-
end
|
127
|
-
|
128
|
-
# Does it exist?
|
129
|
-
if !to_exclude && !File.exists?(f)
|
130
|
-
#puts "WARN: Not FOUND #{f}"
|
131
|
-
to_exclude=true
|
132
|
-
end
|
133
|
-
|
134
|
-
if !to_exclude
|
135
|
-
begin
|
136
|
-
if util.is_pdf?(f)==false
|
137
|
-
lines=grep(f,pattern, pre_context=options[:precontext], post_context=options[:postcontext]);
|
138
|
-
lines.each do |l |
|
139
|
-
puts "#{f}:#{l}"
|
140
|
-
end
|
141
|
-
else
|
142
|
-
puts "#{f} Pdf matches"
|
143
|
-
#Using pdf/reader we can do a search here but we must store the stuff
|
144
|
-
# in a temp file
|
145
|
-
|
146
|
-
tempfile =Tempfile.new("czsearch_pdf.tmp")
|
147
|
-
tempfile.write(util.get_lines(f).join("\n"))
|
148
|
-
tempfile.close
|
149
|
-
#puts "Temp PDF into #{tempfile.path}"
|
150
|
-
lines=grep(tempfile.path,pattern, pre_context=options[:precontext], post_context=options[:postcontext]);
|
151
|
-
lines.each do |l |
|
152
|
-
puts "#{f}:#{l}"
|
153
|
-
end
|
154
|
-
tempfile.unlink
|
155
|
-
end
|
156
|
-
rescue ArgumentError => ioe
|
157
|
-
puts "FATAL ArgumentError on #{f}"
|
158
|
-
raise ioe
|
159
|
-
end
|
160
|
-
else
|
161
|
-
|
162
|
-
end
|
163
|
-
end
|
164
|
-
end
|
1
|
+
#!/bin/bash
|
2
|
+
czlist --redis-server 10.0.2.2:6380 $* | xargs grep $*
|
data/bin/mczindexer
CHANGED
File without changes
|
data/bin/report.rb
CHANGED
data/bin/startRedis
CHANGED
File without changes
|
data/bin/webgui
CHANGED
File without changes
|
data/code_zauker.gemspec
CHANGED
@@ -23,7 +23,7 @@ Gem::Specification.new do |s|
|
|
23
23
|
s.add_development_dependency "yard", "~>0.7"
|
24
24
|
s.add_development_dependency "rubyzip", "~> 0.9"
|
25
25
|
|
26
|
-
s.add_runtime_dependency "hiredis", "~> 0.3"
|
26
|
+
## s.add_runtime_dependency "hiredis", "~> 0.3"
|
27
27
|
s.add_runtime_dependency "redis", "~> 2.2"
|
28
28
|
s.add_runtime_dependency "pdf-reader", "~> 1.0.0"
|
29
29
|
s.add_runtime_dependency "sinatra", "~> 1.3"
|
data/devel.org
CHANGED
@@ -1,3 +1,53 @@
|
|
1
|
+
* Basic commands
|
2
|
+
Make sure to uninstall code zauker from your gems (gem uninstall code_zauker)
|
3
|
+
before start developing
|
4
|
+
** Environment setup (windows)
|
5
|
+
|
6
|
+
Ensure you have Dev kit too
|
7
|
+
http://rubyinstaller.org/downloads
|
8
|
+
https://github.com/oneclick/rubyinstaller/wiki/Development-Kit
|
9
|
+
It is for hiredis: hiredis is not mandatory, but suggested
|
10
|
+
|
11
|
+
|
12
|
+
#+begin_src sh
|
13
|
+
gem install bundler
|
14
|
+
# Dev kit installation...
|
15
|
+
#ruby /c/rubyinstallkit/dk.rb init
|
16
|
+
#ruby /c/rubyinstallkit/dk.rb install
|
17
|
+
bundle install
|
18
|
+
rake test
|
19
|
+
# Ensure dev code is reachable
|
20
|
+
export RUBYLIB=k:/code/code_zauker/lib
|
21
|
+
#+end_src
|
22
|
+
|
23
|
+
|
24
|
+
** To Run tests
|
25
|
+
#+begin_src sh
|
26
|
+
rake test
|
27
|
+
#+end_src
|
28
|
+
|
29
|
+
** To release a new version to rubygem
|
30
|
+
#+begin_src sh
|
31
|
+
rake release
|
32
|
+
#+end_src
|
33
|
+
|
34
|
+
** Dependency management
|
35
|
+
Done with ruby "bundle", you should check periodically dependency
|
36
|
+
with "bundle update" to be sure to be with latest bug fixes of dependence libs
|
37
|
+
|
38
|
+
* Notable facts
|
39
|
+
** DB Size tradeoff
|
40
|
+
If trigram size is greather then 3, the database become larger, because of less collisions.
|
41
|
+
czlist work better with 4-grams then with 3-grams (a lot less false positive)
|
42
|
+
but the size can be 50% bigger
|
43
|
+
|
44
|
+
2-gram size rocks a lot, because of a very small db but false positive are a nightmare.
|
45
|
+
czlist give 2188 files with a "for", but grep report only 383 of them (less then 18% of success)
|
46
|
+
|
47
|
+
|
48
|
+
Emacs-lisp files spot a very huge number of trigrams
|
49
|
+
|
50
|
+
|
1
51
|
* Future/Study
|
2
52
|
To fulfill Google code options:
|
3
53
|
** Google code input
|
data/doc/CodeZauker.html
CHANGED
File without changes
|
data/doc/CodeZauker/CliUtil.html
CHANGED
File without changes
|
File without changes
|
File without changes
|
data/doc/CodeZauker/Util.html
CHANGED
File without changes
|
data/doc/Grep.html
CHANGED
File without changes
|
data/doc/_index.html
CHANGED
File without changes
|
data/doc/class_list.html
CHANGED
File without changes
|
data/doc/css/common.css
CHANGED
File without changes
|
data/doc/css/full_list.css
CHANGED
File without changes
|
data/doc/css/style.css
CHANGED
File without changes
|
data/doc/file_list.html
CHANGED
File without changes
|
data/doc/frames.html
CHANGED
File without changes
|
data/doc/index.html
CHANGED
File without changes
|
data/doc/js/app.js
CHANGED
File without changes
|
data/doc/js/full_list.js
CHANGED
File without changes
|
data/doc/js/jquery.js
CHANGED
File without changes
|
data/doc/method_list.html
CHANGED
File without changes
|
File without changes
|
data/etc/redis-win.conf
CHANGED
@@ -22,9 +22,9 @@ pidfile C:/TEMP/codezauker_redis.pid
|
|
22
22
|
|
23
23
|
# Accept connections on the specified port, default is 6379.
|
24
24
|
# If port 0 is specified Redis will not listen on a TCP socket.
|
25
|
-
|
25
|
+
port 6379
|
26
26
|
# Another port if you run a VM like me
|
27
|
-
port 6380
|
27
|
+
#port 6380
|
28
28
|
|
29
29
|
# If you want you can bind a single interface, if the bind option is not
|
30
30
|
# specified all the interfaces will listen for incoming connections.
|
data/etc/redis.conf
CHANGED
File without changes
|
data/htdocs/CodeZauker.gif
CHANGED
File without changes
|
data/htdocs/Gioorgi.gif
CHANGED
File without changes
|
File without changes
|
File without changes
|
data/htdocs/css/bootstrap.css
CHANGED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
data/htdocs/js/bootstrap.js
CHANGED
File without changes
|
data/htdocs/js/bootstrap.min.js
CHANGED
File without changes
|
data/lib/code_zauker.rb
CHANGED
@@ -2,12 +2,15 @@
|
|
2
2
|
require "code_zauker/version"
|
3
3
|
require "code_zauker/constants"
|
4
4
|
require 'code_zauker/grep'
|
5
|
-
require 'redis/connection/hiredis'
|
5
|
+
# require 'redis/connection/hiredis'
|
6
6
|
require 'redis'
|
7
7
|
require 'set'
|
8
8
|
require 'pdf/reader'
|
9
9
|
require 'date'
|
10
10
|
|
11
|
+
#require 'digest'
|
12
|
+
require 'digest/md5'
|
13
|
+
|
11
14
|
# This module implements a simple reverse indexer
|
12
15
|
# based on Redis
|
13
16
|
# The idea is ispired by http://swtch.com/~rsc/regexp/regexp4.html
|
@@ -199,9 +202,9 @@ module CodeZauker
|
|
199
202
|
end
|
200
203
|
end
|
201
204
|
end
|
202
|
-
if showlog
|
203
|
-
|
204
|
-
end
|
205
|
+
# if showlog
|
206
|
+
# puts " <Pushed #{s.length}..."
|
207
|
+
# end
|
205
208
|
puts "WARN: Some invalid UTF-8 char on #{filename} Case insensitive search will be compromised" if case_insensitive_trigram_failed
|
206
209
|
end
|
207
210
|
|
@@ -226,7 +229,7 @@ module CodeZauker
|
|
226
229
|
private :pushTrigramsSetRecoverable
|
227
230
|
|
228
231
|
|
229
|
-
def load(filename
|
232
|
+
def load(filename)
|
230
233
|
# Define my redis id...
|
231
234
|
# Already exists?...
|
232
235
|
fid=@redis.get "fscan:id:#{filename}"
|
@@ -237,10 +240,18 @@ module CodeZauker
|
|
237
240
|
@redis.set "fscan:id:#{filename}", fid
|
238
241
|
@redis.set "fscan:id2filename:#{fid}",filename
|
239
242
|
else
|
240
|
-
|
241
|
-
|
243
|
+
# ADD MD5 Checksum
|
244
|
+
#Digest::MD5.hexdigest("aaa")
|
245
|
+
fileDigest = Digest::MD5.hexdigest(File.read(filename))
|
246
|
+
storedDigest=@redis.get("cz:md5:#{filename}")
|
247
|
+
if(fileDigest!=storedDigest)
|
248
|
+
puts "#{filename} CHANGED...MD5: #{fileDigest} REINDEXING..."
|
249
|
+
self.remove([filename])
|
250
|
+
else
|
251
|
+
## puts "#{filename} id:#{fid} MD% UP TO DATE and NOT RELOADED"
|
242
252
|
return nil
|
243
253
|
end
|
254
|
+
|
244
255
|
end
|
245
256
|
# fid is the set key!...
|
246
257
|
trigramScanned=0
|
@@ -256,7 +267,7 @@ module CodeZauker
|
|
256
267
|
|
257
268
|
lines.each do |lineNotUTF8|
|
258
269
|
l= util.ensureUTF8(lineNotUTF8)
|
259
|
-
# Split each line into
|
270
|
+
# Split each line into GRAM_SIZE-char chunks, and store in a redis set
|
260
271
|
i=0
|
261
272
|
for istart in 0...(l.length-GRAM_SIZE)
|
262
273
|
trigram = l[istart, GRAM_SIZE]
|
@@ -271,7 +282,7 @@ module CodeZauker
|
|
271
282
|
s=Set.new()
|
272
283
|
end
|
273
284
|
trigramScanned += 1
|
274
|
-
#puts "#{istart}
|
285
|
+
#puts "#{istart} Gram fscan:#{trigram}/ FileId: #{fid}"
|
275
286
|
end
|
276
287
|
end
|
277
288
|
|
@@ -287,8 +298,13 @@ module CodeZauker
|
|
287
298
|
@redis.sadd "fscan:processedFiles", "#{filename}"
|
288
299
|
trigramRatio=( (trigramsOnFile*1.0) / trigramScanned )* 100.0
|
289
300
|
if trigramRatio < 10 or trigramRatio >75
|
290
|
-
puts "#{filename}\n\tRatio:#{trigramRatio.round}% Unique
|
301
|
+
puts "#{filename}\n\tRatio:#{trigramRatio.round}% Unique #{GRAM_SIZE}-grams:#{trigramsOnFile} Total Scanned: #{trigramScanned} ?Binary" if trigramRatio >90 and trigramsOnFile>70
|
291
302
|
end
|
303
|
+
|
304
|
+
# Register digest...do at last for better security
|
305
|
+
fileDigest = Digest::MD5.hexdigest(File.read(filename))
|
306
|
+
@redis.set("cz:md5:#{filename}",fileDigest)
|
307
|
+
|
292
308
|
return nil
|
293
309
|
end
|
294
310
|
|
@@ -344,11 +360,11 @@ module CodeZauker
|
|
344
360
|
# YourAppManager
|
345
361
|
def wsearch(term)
|
346
362
|
# Split stuff
|
347
|
-
puts "Wild Search request:#{term}"
|
363
|
+
#puts "Wild Search request:#{term}"
|
348
364
|
m=term.split("*")
|
349
365
|
if m.length>0
|
350
366
|
trigramInAnd=Set.new()
|
351
|
-
puts "*= Found:#{m.length}"
|
367
|
+
#puts "*= Found:#{m.length}"
|
352
368
|
m.each do | wtc |
|
353
369
|
wt=wtc.downcase()
|
354
370
|
#puts "Splitting #{wt}"
|
@@ -386,7 +402,7 @@ module CodeZauker
|
|
386
402
|
#puts "Reindexing... #{fileList.length} files..."
|
387
403
|
fileList.each do |current_file |
|
388
404
|
self.remove([current_file])
|
389
|
-
self.load(current_file
|
405
|
+
self.load(current_file)
|
390
406
|
end
|
391
407
|
end
|
392
408
|
|
data/lib/code_zauker/cli.rb
CHANGED
@@ -79,8 +79,8 @@ module CodeZauker
|
|
79
79
|
min=trigramsOnFile if trigramsOnFile <min and trigramsOnFile>0
|
80
80
|
end
|
81
81
|
av=sum/count
|
82
|
-
puts "Average
|
83
|
-
tagCharSize=max/
|
82
|
+
puts "Average -grams per file:#{av} Min: #{min} Max: #{max}"
|
83
|
+
tagCharSize=max/20
|
84
84
|
#tagCharSize=max/10 if tagCharSize>80
|
85
85
|
puts "Graphic summary... +=#{tagCharSize}"
|
86
86
|
ids.each do | fid |
|
@@ -88,7 +88,7 @@ module CodeZauker
|
|
88
88
|
if trigramsOnFile>= (tagCharSize*3)
|
89
89
|
fname=redis.get("fscan:id2filename:#{fid}")
|
90
90
|
bar="+"*(trigramsOnFile/tagCharSize)
|
91
|
-
puts "#{bar} #{fname}"
|
91
|
+
puts "#{trigramsOnFile} #{bar} #{fname}"
|
92
92
|
end
|
93
93
|
end
|
94
94
|
|
data/lib/code_zauker/grep.rb
CHANGED
File without changes
|
data/lib/code_zauker/version.rb
CHANGED
data/lib/code_zauker/webgui.rb
CHANGED
File without changes
|
data/readme.org
CHANGED
@@ -4,6 +4,23 @@ Code Zauker is based from ideas taken by old Google Code Search and uses Redis a
|
|
4
4
|
|
5
5
|
For news and discussion: http://gioorgi.com/tag/code-zauker/
|
6
6
|
|
7
|
+
* NEWS
|
8
|
+
** And an happy coding year! 2013
|
9
|
+
Version 0.1.0 spot an auto-reindexing system, 3-gram size and a new command, czlist.
|
10
|
+
CodeZauker will store a MD5 checksum for every file and reindex automatically changed files.
|
11
|
+
There is a small API change: FileScanner>>load will no longer accept noReload because it will
|
12
|
+
automatically use md5 to undestand if a reindex is needed.
|
13
|
+
Anyway, client code should relay on FileScanner>>reindex to force a reindex of a file.
|
14
|
+
This feature is still beta but works pretty well.
|
15
|
+
|
16
|
+
When a lot of reindexing is involved, performance can drop hard under 2 files per sec
|
17
|
+
|
18
|
+
Version 0.1.0 spot also a new command, /czlist/ which simplify integration with unix tool-chain.
|
19
|
+
czlist access directly to code zauker core to show only the filename
|
20
|
+
which could contains the searchstring.
|
21
|
+
czlist is ideal for IDE integration
|
22
|
+
|
23
|
+
|
7
24
|
|
8
25
|
* INSTALL
|
9
26
|
To install Code Zauker,simply issue
|
@@ -70,7 +87,7 @@ and enjoy!
|
|
70
87
|
* MS-Windows Compatibility
|
71
88
|
Grab your windows redis server at
|
72
89
|
https://github.com/dmajkic/redis/downloads
|
73
|
-
Version 0.0.9 has been
|
90
|
+
Version 0.0.9 and 0.1.0 has been successful tested with Redis 2.4.5 32bit version
|
74
91
|
You will find a
|
75
92
|
redis-win.conf example
|
76
93
|
to give you a fast-startup
|
@@ -79,6 +96,9 @@ to give you a fast-startup
|
|
79
96
|
* Release History
|
80
97
|
| Version | Date | Summary |
|
81
98
|
|---------+-------------+-------------------------------------------------------------------------------|
|
99
|
+
| 0.1.0 | | Added czlist command which supersedes czsearch. |
|
100
|
+
| | | Czindex now spot a better auto redindexing feature |
|
101
|
+
| | | Removed hiredis dependency for easier installation under ms-windows |
|
82
102
|
| 0.0.9 | 12 Oct 2012 | Removed case sensitive backend to improve space use. Er Zauker Compatibility. |
|
83
103
|
| | | Tested on MSWin |
|
84
104
|
| 0.0.8 | 04 Jun 2012 | Wildcard (*) search/better error handling of missed files/indexchecker |
|
@@ -90,9 +110,11 @@ to give you a fast-startup
|
|
90
110
|
| 0.0.2 | 29 Jan 2012 | Removed dependency on unix find for czindexer. |
|
91
111
|
| 0.0.1 | 26 Jan 2012 | First RubyGems Release (for testing purpose only) |
|
92
112
|
|
93
|
-
|
94
|
-
|
113
|
+
|
95
114
|
* DEVELOPING
|
96
115
|
For developing with Code Zauker you need bundler 1.0.21 or above
|
97
|
-
See devel.org file
|
116
|
+
See devel.org file for more information
|
117
|
+
|
118
|
+
* KNOWN BUGS / LIMITATIONS
|
119
|
+
At the time of writing, indexing emacs-lisp file is a very slow task.
|
98
120
|
|
data/templates/search.erb
CHANGED
File without changes
|
data/templates/show_results.erb
CHANGED
File without changes
|
File without changes
|
data/test/fixture/foolish.txt
CHANGED
File without changes
|
data/test/fixture/kurukku.txt
CHANGED
File without changes
|
File without changes
|
File without changes
|
data/test/fixture/wildtest.txt
CHANGED
File without changes
|
data/test/test_pdf_indexing.rb
CHANGED
File without changes
|
data/test/test_search.rb
CHANGED
@@ -23,7 +23,7 @@ class FileScannerBasicSearch < Test::Unit::TestCase
|
|
23
23
|
|
24
24
|
def test_scanner_trigram_simple
|
25
25
|
fs=CodeZauker::FileScanner.new()
|
26
|
-
fs.load("./readme.org"
|
26
|
+
fs.load("./readme.org")
|
27
27
|
fs.load("./test/fixture/kurukku.txt")
|
28
28
|
files=fs.search("kku")
|
29
29
|
assert (files[0].include?("fixture/kurukku.txt")==true)
|
@@ -66,14 +66,14 @@ class FileScannerBasicSearch < Test::Unit::TestCase
|
|
66
66
|
|
67
67
|
def test_very_big_file
|
68
68
|
fs=CodeZauker::FileScanner.new()
|
69
|
-
fs.load("./test/fixture/TEST_LICENSE.txt"
|
69
|
+
fs.load("./test/fixture/TEST_LICENSE.txt")
|
70
70
|
files=fs.search('"Commercial Use"')
|
71
71
|
assert files.include?("./test/fixture/TEST_LICENSE.txt")==true
|
72
72
|
end
|
73
73
|
|
74
74
|
def test_remove
|
75
75
|
fs=CodeZauker::FileScanner.new()
|
76
|
-
fs.load("./test/fixture/kurukku.txt"
|
76
|
+
fs.load("./test/fixture/kurukku.txt")
|
77
77
|
fs.remove(["./test/fixture/kurukku.txt"])
|
78
78
|
files=fs.search("\"Be hungry, be foolish\"")
|
79
79
|
assert files.length ==0,
|
@@ -86,7 +86,7 @@ class FileScannerBasicSearch < Test::Unit::TestCase
|
|
86
86
|
# require 'redis'
|
87
87
|
# redis=Redis.new
|
88
88
|
# fs=CodeZauker::FileScanner.new(redis)
|
89
|
-
# fs.load("./test/fixture/kurukku.txt"
|
89
|
+
# fs.load("./test/fixture/kurukku.txt")
|
90
90
|
# fs.removeAll()
|
91
91
|
# foundKeys=redis.keys "*"
|
92
92
|
# #puts "Keys at empty db:#{foundKeys}"
|
@@ -96,14 +96,14 @@ class FileScannerBasicSearch < Test::Unit::TestCase
|
|
96
96
|
# # 2012 Jan 30 New Case Insensitive Test cases
|
97
97
|
def test_case_insensitive1
|
98
98
|
fs=CodeZauker::FileScanner.new()
|
99
|
-
fs.load("./test/fixture/kurukku.txt"
|
99
|
+
fs.load("./test/fixture/kurukku.txt")
|
100
100
|
flist=fs.isearch("caseinsensitive Search TEST.")
|
101
101
|
assert flist.include?("./test/fixture/kurukku.txt"), "Case insensitive search failed. #{flist}"
|
102
102
|
end
|
103
103
|
|
104
104
|
def test_case_insensitive2
|
105
105
|
fs=CodeZauker::FileScanner.new()
|
106
|
-
fs.load("./test/fixture/kurukku.txt"
|
106
|
+
fs.load("./test/fixture/kurukku.txt")
|
107
107
|
flist=fs.isearch("caSeinsenSitive Search TEST.")
|
108
108
|
assert flist.include?("./test/fixture/kurukku.txt"), "Case insensitive search failed. #{flist}"
|
109
109
|
assert fs.search("CASeinsenSitivE").include?("./test/fixture/kurukku.txt"), "Search must be always insensitive"
|
@@ -111,7 +111,7 @@ class FileScannerBasicSearch < Test::Unit::TestCase
|
|
111
111
|
|
112
112
|
def test_case_insensitive3
|
113
113
|
fs=CodeZauker::FileScanner.new()
|
114
|
-
fs.load("./test/fixture/kurukku.txt"
|
114
|
+
fs.load("./test/fixture/kurukku.txt")
|
115
115
|
u=CodeZauker::Util.new()
|
116
116
|
(u.mixCase("CaSeinsen")).each { |t|
|
117
117
|
#puts "Checking #{t}"
|
data/test/test_wild_search.rb
CHANGED
File without changes
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: code_zauker
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0
|
4
|
+
version: 0.1.0
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,11 +9,11 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date:
|
12
|
+
date: 2013-05-21 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: yard
|
16
|
-
requirement:
|
16
|
+
requirement: !ruby/object:Gem::Requirement
|
17
17
|
none: false
|
18
18
|
requirements:
|
19
19
|
- - ~>
|
@@ -21,10 +21,15 @@ dependencies:
|
|
21
21
|
version: '0.7'
|
22
22
|
type: :development
|
23
23
|
prerelease: false
|
24
|
-
version_requirements:
|
24
|
+
version_requirements: !ruby/object:Gem::Requirement
|
25
|
+
none: false
|
26
|
+
requirements:
|
27
|
+
- - ~>
|
28
|
+
- !ruby/object:Gem::Version
|
29
|
+
version: '0.7'
|
25
30
|
- !ruby/object:Gem::Dependency
|
26
31
|
name: rubyzip
|
27
|
-
requirement:
|
32
|
+
requirement: !ruby/object:Gem::Requirement
|
28
33
|
none: false
|
29
34
|
requirements:
|
30
35
|
- - ~>
|
@@ -32,21 +37,15 @@ dependencies:
|
|
32
37
|
version: '0.9'
|
33
38
|
type: :development
|
34
39
|
prerelease: false
|
35
|
-
version_requirements:
|
36
|
-
- !ruby/object:Gem::Dependency
|
37
|
-
name: hiredis
|
38
|
-
requirement: &79399600 !ruby/object:Gem::Requirement
|
40
|
+
version_requirements: !ruby/object:Gem::Requirement
|
39
41
|
none: false
|
40
42
|
requirements:
|
41
43
|
- - ~>
|
42
44
|
- !ruby/object:Gem::Version
|
43
|
-
version: '0.
|
44
|
-
type: :runtime
|
45
|
-
prerelease: false
|
46
|
-
version_requirements: *79399600
|
45
|
+
version: '0.9'
|
47
46
|
- !ruby/object:Gem::Dependency
|
48
47
|
name: redis
|
49
|
-
requirement:
|
48
|
+
requirement: !ruby/object:Gem::Requirement
|
50
49
|
none: false
|
51
50
|
requirements:
|
52
51
|
- - ~>
|
@@ -54,10 +53,15 @@ dependencies:
|
|
54
53
|
version: '2.2'
|
55
54
|
type: :runtime
|
56
55
|
prerelease: false
|
57
|
-
version_requirements:
|
56
|
+
version_requirements: !ruby/object:Gem::Requirement
|
57
|
+
none: false
|
58
|
+
requirements:
|
59
|
+
- - ~>
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: '2.2'
|
58
62
|
- !ruby/object:Gem::Dependency
|
59
63
|
name: pdf-reader
|
60
|
-
requirement:
|
64
|
+
requirement: !ruby/object:Gem::Requirement
|
61
65
|
none: false
|
62
66
|
requirements:
|
63
67
|
- - ~>
|
@@ -65,10 +69,15 @@ dependencies:
|
|
65
69
|
version: 1.0.0
|
66
70
|
type: :runtime
|
67
71
|
prerelease: false
|
68
|
-
version_requirements:
|
72
|
+
version_requirements: !ruby/object:Gem::Requirement
|
73
|
+
none: false
|
74
|
+
requirements:
|
75
|
+
- - ~>
|
76
|
+
- !ruby/object:Gem::Version
|
77
|
+
version: 1.0.0
|
69
78
|
- !ruby/object:Gem::Dependency
|
70
79
|
name: sinatra
|
71
|
-
requirement:
|
80
|
+
requirement: !ruby/object:Gem::Requirement
|
72
81
|
none: false
|
73
82
|
requirements:
|
74
83
|
- - ~>
|
@@ -76,10 +85,15 @@ dependencies:
|
|
76
85
|
version: '1.3'
|
77
86
|
type: :runtime
|
78
87
|
prerelease: false
|
79
|
-
version_requirements:
|
88
|
+
version_requirements: !ruby/object:Gem::Requirement
|
89
|
+
none: false
|
90
|
+
requirements:
|
91
|
+
- - ~>
|
92
|
+
- !ruby/object:Gem::Version
|
93
|
+
version: '1.3'
|
80
94
|
- !ruby/object:Gem::Dependency
|
81
95
|
name: redis_logger
|
82
|
-
requirement:
|
96
|
+
requirement: !ruby/object:Gem::Requirement
|
83
97
|
none: false
|
84
98
|
requirements:
|
85
99
|
- - ~>
|
@@ -87,13 +101,19 @@ dependencies:
|
|
87
101
|
version: '0.1'
|
88
102
|
type: :runtime
|
89
103
|
prerelease: false
|
90
|
-
version_requirements:
|
104
|
+
version_requirements: !ruby/object:Gem::Requirement
|
105
|
+
none: false
|
106
|
+
requirements:
|
107
|
+
- - ~>
|
108
|
+
- !ruby/object:Gem::Version
|
109
|
+
version: '0.1'
|
91
110
|
description: Code Zauker is based from ideas taken by old Google Code Search and uses
|
92
111
|
Redis as a basic platform
|
93
112
|
email:
|
94
113
|
- jj@gioorgi.com
|
95
114
|
executables:
|
96
115
|
- czindexer
|
116
|
+
- czlist
|
97
117
|
- czsearch
|
98
118
|
- mczindexer
|
99
119
|
- report.rb
|
@@ -108,6 +128,7 @@ files:
|
|
108
128
|
- LICENSE.txt
|
109
129
|
- Rakefile
|
110
130
|
- bin/czindexer
|
131
|
+
- bin/czlist
|
111
132
|
- bin/czsearch
|
112
133
|
- bin/mczindexer
|
113
134
|
- bin/report.rb
|
@@ -184,7 +205,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
184
205
|
version: '0'
|
185
206
|
requirements: []
|
186
207
|
rubyforge_project: code_zauker
|
187
|
-
rubygems_version: 1.8.
|
208
|
+
rubygems_version: 1.8.24
|
188
209
|
signing_key:
|
189
210
|
specification_version: 3
|
190
211
|
summary: A search engine for programming languages
|