code_zauker 0.0.9 → 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.gitignore +0 -0
- data/BUGS.org +6 -0
- data/Gemfile +0 -0
- data/LICENSE.txt +0 -0
- data/Rakefile +0 -0
- data/bin/czindexer +2 -2
- data/bin/czlist +122 -0
- data/bin/czsearch +2 -164
- data/bin/mczindexer +0 -0
- data/bin/report.rb +0 -1
- data/bin/startRedis +0 -0
- data/bin/webgui +0 -0
- data/code_zauker.gemspec +1 -1
- data/devel.org +50 -0
- data/doc/CodeZauker.html +0 -0
- data/doc/CodeZauker/CliUtil.html +0 -0
- data/doc/CodeZauker/FileScanner.html +0 -0
- data/doc/CodeZauker/IndexManager.html +0 -0
- data/doc/CodeZauker/Util.html +0 -0
- data/doc/Grep.html +0 -0
- data/doc/_index.html +0 -0
- data/doc/class_list.html +0 -0
- data/doc/css/common.css +0 -0
- data/doc/css/full_list.css +0 -0
- data/doc/css/style.css +0 -0
- data/doc/file_list.html +0 -0
- data/doc/frames.html +0 -0
- data/doc/index.html +0 -0
- data/doc/js/app.js +0 -0
- data/doc/js/full_list.js +0 -0
- data/doc/js/jquery.js +0 -0
- data/doc/method_list.html +0 -0
- data/doc/top-level-namespace.html +0 -0
- data/etc/redis-win.conf +2 -2
- data/etc/redis.conf +0 -0
- data/htdocs/CodeZauker.gif +0 -0
- data/htdocs/Gioorgi.gif +0 -0
- data/htdocs/css/bootstrap-responsive.css +0 -0
- data/htdocs/css/bootstrap-responsive.min.css +0 -0
- data/htdocs/css/bootstrap.css +0 -0
- data/htdocs/css/bootstrap.min.css +0 -0
- data/htdocs/img/glyphicons-halflings-white.png +0 -0
- data/htdocs/img/glyphicons-halflings.png +0 -0
- data/htdocs/js/bootstrap.js +0 -0
- data/htdocs/js/bootstrap.min.js +0 -0
- data/lib/code_zauker.rb +29 -13
- data/lib/code_zauker/cli.rb +3 -3
- data/lib/code_zauker/constants.rb +2 -2
- data/lib/code_zauker/grep.rb +0 -0
- data/lib/code_zauker/version.rb +1 -1
- data/lib/code_zauker/webgui.rb +0 -0
- data/readme.org +26 -4
- data/templates/search.erb +0 -0
- data/templates/show_results.erb +0 -0
- data/test/fixture/TEST_LICENSE.txt +0 -0
- data/test/fixture/foolish.txt +0 -0
- data/test/fixture/kurukku.txt +0 -0
- data/test/fixture/simple_test.pdf +0 -0
- data/test/fixture/testArchive.zip +0 -0
- data/test/fixture/wildtest.txt +0 -0
- data/test/test_pdf_indexing.rb +0 -0
- data/test/test_search.rb +7 -7
- data/test/test_wild_search.rb +0 -0
- metadata +43 -22
data/.gitignore
CHANGED
File without changes
|
data/BUGS.org
CHANGED
@@ -2,3 +2,9 @@
|
|
2
2
|
* Bug 001 :wontfix_soon:
|
3
3
|
Indexing a 700Kb gem take too much time, it seems looping
|
4
4
|
Avoid indexing gem file for the meantime.
|
5
|
+
* Bug 002 :limitation:
|
6
|
+
Reindexing does not work very well. Code Zauker will not automatically detect a file has changed.
|
7
|
+
Implement a md5 checksum support for reindexing changed files fast
|
8
|
+
|
9
|
+
* Bug 003 :low:
|
10
|
+
Avoid keys() command because it is deprecated for normal usage scenario
|
data/Gemfile
CHANGED
File without changes
|
data/LICENSE.txt
CHANGED
File without changes
|
data/Rakefile
CHANGED
File without changes
|
data/bin/czindexer
CHANGED
@@ -3,7 +3,7 @@
|
|
3
3
|
# find test/fixture/ -type f | xargs -P 5 -n 10 ./bin/czindexer
|
4
4
|
# will fire 5 czindexer each with 10 files to process...
|
5
5
|
require 'code_zauker/cli'
|
6
|
-
require 'redis/connection/hiredis'
|
6
|
+
#require 'redis/connection/hiredis'
|
7
7
|
require 'redis'
|
8
8
|
require 'optparse'
|
9
9
|
options={}
|
@@ -86,7 +86,7 @@ def processElement(l,fs,options)
|
|
86
86
|
if options[:reindex] == true
|
87
87
|
fs.reindex([l])
|
88
88
|
else
|
89
|
-
fs.load(l
|
89
|
+
fs.load(l)
|
90
90
|
end
|
91
91
|
timeTaken=Time.now-startTime
|
92
92
|
$PROCESSED_FILES+=1
|
data/bin/czlist
ADDED
@@ -0,0 +1,122 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
#== czlist list only the files which can contain the required string
|
3
|
+
# should be combined with "xargs grep/egrep"
|
4
|
+
# Ideal for ide integration
|
5
|
+
# Simpler then czsearch
|
6
|
+
$VERBOSE=nil
|
7
|
+
require 'code_zauker'
|
8
|
+
require 'code_zauker/cli'
|
9
|
+
#require 'redis/connection/hiredis'
|
10
|
+
require 'redis'
|
11
|
+
#include Grep
|
12
|
+
|
13
|
+
require 'optparse'
|
14
|
+
options={}
|
15
|
+
optparse= OptionParser.new do |opts|
|
16
|
+
opts.banner="Usage: czlist [options] [term1] [term2]..."
|
17
|
+
options[:extensions_to_ignore]=[]
|
18
|
+
options[:file_to_exclude]=[]
|
19
|
+
options[:redis_host]="127.0.0.1"
|
20
|
+
options[:redis_port]=6379
|
21
|
+
options[:redis_password]=nil
|
22
|
+
options[:be_wild]=true
|
23
|
+
|
24
|
+
|
25
|
+
|
26
|
+
opts.on('-X','--exclude FILE_PATTERN',String,
|
27
|
+
'Exclude files that match FILE_PATTERN (as ruby regexp). Case insensitive') do |p|
|
28
|
+
options[:file_to_exclude].push(/#{Regexp.escape(p)}/i);
|
29
|
+
end
|
30
|
+
|
31
|
+
opts.on('-w','--wild','Do a wildcharacter search. * means "every char". True by default') do
|
32
|
+
options[:be_wild] = true
|
33
|
+
options[:ignorecase]=true
|
34
|
+
end
|
35
|
+
|
36
|
+
opts.on('-e','--exact','Disable wild search Userful if you need to search * or exact matches ') do
|
37
|
+
options[:be_wild] = false
|
38
|
+
options[:ignorecase]=true
|
39
|
+
end
|
40
|
+
|
41
|
+
|
42
|
+
opts.on('--redis-server pass@SERVER:port', String,
|
43
|
+
'Specify the alternate redis server to use')do |server|
|
44
|
+
myoptions=CodeZauker::CliUtil.new().parse_host_options(server)
|
45
|
+
options[:redis_host]=myoptions[:redis_host]
|
46
|
+
options[:redis_port]=myoptions[:redis_port]
|
47
|
+
options[:redis_password]=myoptions[:redis_password]
|
48
|
+
|
49
|
+
if options[:redis_password]
|
50
|
+
#puts "Server: #{options[:redis_host]} Port:#{options[:redis_port]} WithPassword"
|
51
|
+
else
|
52
|
+
#puts "Server: #{options[:redis_host]} Port:#{options[:redis_port]}"
|
53
|
+
end
|
54
|
+
end
|
55
|
+
|
56
|
+
|
57
|
+
|
58
|
+
opts.on( '-h', '--help', 'Display this screen' ) do
|
59
|
+
puts opts
|
60
|
+
puts "EXAMPLES:"
|
61
|
+
puts "czlist for"
|
62
|
+
puts " Will search for loops and return the file smatching it"
|
63
|
+
puts "czlist -w 'public*class School'"
|
64
|
+
puts " Will seach for a java class called School ignoring characters between public and class."
|
65
|
+
puts "czlist for | xargs grep for"
|
66
|
+
puts " will be quite the same of czsearch but faster."
|
67
|
+
puts "Search is always case insensitive and wild by default"
|
68
|
+
exit
|
69
|
+
end
|
70
|
+
end
|
71
|
+
optparse.parse!
|
72
|
+
|
73
|
+
ARGV.each do | s |
|
74
|
+
#puts "Code Zauker Searching for #{s}"
|
75
|
+
util=CodeZauker::Util.new()
|
76
|
+
redisConnection=Redis.new(:host => options[:redis_host], :port => options[:redis_port], :password=> options[:redis_password])
|
77
|
+
fs=CodeZauker::FileScanner.new(redisConnection)
|
78
|
+
|
79
|
+
if options[:be_wild]==true
|
80
|
+
cli=CodeZauker::CliUtil.new()
|
81
|
+
r=cli.doWildSearch(s,fs)
|
82
|
+
files= r[:files]
|
83
|
+
pattern=r[:regexp]
|
84
|
+
else
|
85
|
+
# It uses always isearch
|
86
|
+
# and delegates to the grep subsystem to find it out
|
87
|
+
files=fs.isearch(s)
|
88
|
+
end
|
89
|
+
|
90
|
+
files.each do |f|
|
91
|
+
to_exclude=false
|
92
|
+
if options[:file_to_exclude].length >0
|
93
|
+
# Will match?
|
94
|
+
to_exclude=false
|
95
|
+
options[:file_to_exclude].each do |pattern|
|
96
|
+
#puts "\n\t#{f} =~ #{pattern}"
|
97
|
+
if (f =~ pattern )
|
98
|
+
to_exclude=true
|
99
|
+
#puts "Excluded #{f}"
|
100
|
+
break
|
101
|
+
end
|
102
|
+
end
|
103
|
+
end
|
104
|
+
|
105
|
+
# Does it exist?
|
106
|
+
if !to_exclude && !File.exists?(f)
|
107
|
+
#puts "WARN: Not FOUND #{f}"
|
108
|
+
to_exclude=true
|
109
|
+
end
|
110
|
+
|
111
|
+
if !to_exclude
|
112
|
+
begin
|
113
|
+
puts "#{f}"
|
114
|
+
rescue ArgumentError => ioe
|
115
|
+
puts "FATAL ArgumentError on #{f}"
|
116
|
+
raise ioe
|
117
|
+
end
|
118
|
+
else
|
119
|
+
|
120
|
+
end
|
121
|
+
end
|
122
|
+
end
|
data/bin/czsearch
CHANGED
@@ -1,164 +1,2 @@
|
|
1
|
-
#!/
|
2
|
-
|
3
|
-
# Send something like -W0 to ruby, for a cleaner output
|
4
|
-
$VERBOSE=nil
|
5
|
-
require 'code_zauker'
|
6
|
-
require 'code_zauker/grep'
|
7
|
-
require 'code_zauker/cli'
|
8
|
-
require 'redis/connection/hiredis'
|
9
|
-
require 'redis'
|
10
|
-
require 'tempfile'
|
11
|
-
require 'pdf/reader'
|
12
|
-
include Grep
|
13
|
-
|
14
|
-
require 'optparse'
|
15
|
-
options={}
|
16
|
-
optparse= OptionParser.new do |opts|
|
17
|
-
opts.banner="Usage: czsearch [options] [term1] [term2]..."
|
18
|
-
options[:ignorecase]=false
|
19
|
-
options[:precontext]=0
|
20
|
-
options[:postcontext]=0
|
21
|
-
options[:extensions_to_ignore]=[]
|
22
|
-
options[:file_to_exclude]=[]
|
23
|
-
options[:redis_host]="127.0.0.1"
|
24
|
-
options[:redis_port]=6379
|
25
|
-
options[:redis_password]=nil
|
26
|
-
options[:be_wild]=false
|
27
|
-
|
28
|
-
opts.on('-i', '--ignore-case','ignore case distinctions') do
|
29
|
-
options[:ignorecase]=true
|
30
|
-
end
|
31
|
-
|
32
|
-
opts.on('-B', '--before-context NUM', Integer, 'print NUM lines of leading context') do | c |
|
33
|
-
options[:precontext]=c
|
34
|
-
end
|
35
|
-
|
36
|
-
opts.on('-A','--after-context NUM',Integer,'print NUM lines of trailing context') do | c |
|
37
|
-
options[:postcontext]=c
|
38
|
-
end
|
39
|
-
opts.on('-C','--context NUM',Integer,'print NUM lines of output context') do | c |
|
40
|
-
if c>0
|
41
|
-
options[:postcontext]=c
|
42
|
-
options[:precontext]=options[:postcontext]
|
43
|
-
end
|
44
|
-
end
|
45
|
-
|
46
|
-
|
47
|
-
opts.on('-X','--exclude FILE_PATTERN',String,
|
48
|
-
'Exclude files that match FILE_PATTERN (as ruby regexp). Case insensitive') do |p|
|
49
|
-
options[:file_to_exclude].push(/#{Regexp.escape(p)}/i);
|
50
|
-
end
|
51
|
-
|
52
|
-
opts.on('-w','--wild','Do a wildcharacter search. * means "every char". Imply -i') do
|
53
|
-
options[:be_wild] = true
|
54
|
-
options[:ignorecase]=true
|
55
|
-
end
|
56
|
-
|
57
|
-
|
58
|
-
opts.on('-h','--redis-server pass@SERVER:port', String,
|
59
|
-
'Specify the alternate redis server to use')do |server|
|
60
|
-
myoptions=CodeZauker::CliUtil.new().parse_host_options(server)
|
61
|
-
options[:redis_host]=myoptions[:redis_host]
|
62
|
-
options[:redis_port]=myoptions[:redis_port]
|
63
|
-
options[:redis_password]=myoptions[:redis_password]
|
64
|
-
|
65
|
-
if options[:redis_password]
|
66
|
-
puts "Server: #{options[:redis_host]} Port:#{options[:redis_port]} WithPassword"
|
67
|
-
else
|
68
|
-
puts "Server: #{options[:redis_host]} Port:#{options[:redis_port]}"
|
69
|
-
end
|
70
|
-
end
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
opts.on( '-h', '--help', 'Display this screen' ) do
|
75
|
-
puts opts
|
76
|
-
puts "EXAMPLES:"
|
77
|
-
puts "czsearch ciao Koros"
|
78
|
-
puts " Will search Koros OR ciao"
|
79
|
-
puts "czsearch -i gnu"
|
80
|
-
puts " Will match also GNU and Gnu"
|
81
|
-
puts "czsearch -X .orig -X .bak -X .java html:select"
|
82
|
-
puts " Will skip java and backup file"
|
83
|
-
puts "czsearch -w 'public*class School'"
|
84
|
-
puts " Will seach for a java class called School ignoring characters between public and class."
|
85
|
-
exit
|
86
|
-
end
|
87
|
-
end
|
88
|
-
optparse.parse!
|
89
|
-
|
90
|
-
ARGV.each do | s |
|
91
|
-
#puts "Code Zauker Searching for #{s}"
|
92
|
-
util=CodeZauker::Util.new()
|
93
|
-
redisConnection=Redis.new(:host => options[:redis_host], :port => options[:redis_port], :password=> options[:redis_password])
|
94
|
-
fs=CodeZauker::FileScanner.new(redisConnection)
|
95
|
-
|
96
|
-
if options[:be_wild]==true
|
97
|
-
puts "Wild MODE"
|
98
|
-
cli=CodeZauker::CliUtil.new()
|
99
|
-
r=cli.doWildSearch(s,fs)
|
100
|
-
files= r[:files]
|
101
|
-
pattern=r[:regexp]
|
102
|
-
else
|
103
|
-
# It uses always isearch
|
104
|
-
# and delegates to the grep subsystem to find it out
|
105
|
-
files=fs.isearch(s)
|
106
|
-
if options[:ignorecase]==false
|
107
|
-
pattern=/#{Regexp.escape(s)}/
|
108
|
-
else
|
109
|
-
pattern=/#{Regexp.escape(s)}/i
|
110
|
-
end
|
111
|
-
end
|
112
|
-
|
113
|
-
files.each do |f|
|
114
|
-
to_exclude=false
|
115
|
-
if options[:file_to_exclude].length >0
|
116
|
-
# Will match?
|
117
|
-
to_exclude=false
|
118
|
-
options[:file_to_exclude].each do |pattern|
|
119
|
-
#puts "\n\t#{f} =~ #{pattern}"
|
120
|
-
if (f =~ pattern )
|
121
|
-
to_exclude=true
|
122
|
-
#puts "Excluded #{f}"
|
123
|
-
break
|
124
|
-
end
|
125
|
-
end
|
126
|
-
end
|
127
|
-
|
128
|
-
# Does it exist?
|
129
|
-
if !to_exclude && !File.exists?(f)
|
130
|
-
#puts "WARN: Not FOUND #{f}"
|
131
|
-
to_exclude=true
|
132
|
-
end
|
133
|
-
|
134
|
-
if !to_exclude
|
135
|
-
begin
|
136
|
-
if util.is_pdf?(f)==false
|
137
|
-
lines=grep(f,pattern, pre_context=options[:precontext], post_context=options[:postcontext]);
|
138
|
-
lines.each do |l |
|
139
|
-
puts "#{f}:#{l}"
|
140
|
-
end
|
141
|
-
else
|
142
|
-
puts "#{f} Pdf matches"
|
143
|
-
#Using pdf/reader we can do a search here but we must store the stuff
|
144
|
-
# in a temp file
|
145
|
-
|
146
|
-
tempfile =Tempfile.new("czsearch_pdf.tmp")
|
147
|
-
tempfile.write(util.get_lines(f).join("\n"))
|
148
|
-
tempfile.close
|
149
|
-
#puts "Temp PDF into #{tempfile.path}"
|
150
|
-
lines=grep(tempfile.path,pattern, pre_context=options[:precontext], post_context=options[:postcontext]);
|
151
|
-
lines.each do |l |
|
152
|
-
puts "#{f}:#{l}"
|
153
|
-
end
|
154
|
-
tempfile.unlink
|
155
|
-
end
|
156
|
-
rescue ArgumentError => ioe
|
157
|
-
puts "FATAL ArgumentError on #{f}"
|
158
|
-
raise ioe
|
159
|
-
end
|
160
|
-
else
|
161
|
-
|
162
|
-
end
|
163
|
-
end
|
164
|
-
end
|
1
|
+
#!/bin/bash
|
2
|
+
czlist --redis-server 10.0.2.2:6380 $* | xargs grep $*
|
data/bin/mczindexer
CHANGED
File without changes
|
data/bin/report.rb
CHANGED
data/bin/startRedis
CHANGED
File without changes
|
data/bin/webgui
CHANGED
File without changes
|
data/code_zauker.gemspec
CHANGED
@@ -23,7 +23,7 @@ Gem::Specification.new do |s|
|
|
23
23
|
s.add_development_dependency "yard", "~>0.7"
|
24
24
|
s.add_development_dependency "rubyzip", "~> 0.9"
|
25
25
|
|
26
|
-
s.add_runtime_dependency "hiredis", "~> 0.3"
|
26
|
+
## s.add_runtime_dependency "hiredis", "~> 0.3"
|
27
27
|
s.add_runtime_dependency "redis", "~> 2.2"
|
28
28
|
s.add_runtime_dependency "pdf-reader", "~> 1.0.0"
|
29
29
|
s.add_runtime_dependency "sinatra", "~> 1.3"
|
data/devel.org
CHANGED
@@ -1,3 +1,53 @@
|
|
1
|
+
* Basic commands
|
2
|
+
Make sure to uninstall code zauker from your gems (gem uninstall code_zauker)
|
3
|
+
before start developing
|
4
|
+
** Environment setup (windows)
|
5
|
+
|
6
|
+
Ensure you have Dev kit too
|
7
|
+
http://rubyinstaller.org/downloads
|
8
|
+
https://github.com/oneclick/rubyinstaller/wiki/Development-Kit
|
9
|
+
It is for hiredis: hiredis is not mandatory, but suggested
|
10
|
+
|
11
|
+
|
12
|
+
#+begin_src sh
|
13
|
+
gem install bundler
|
14
|
+
# Dev kit installation...
|
15
|
+
#ruby /c/rubyinstallkit/dk.rb init
|
16
|
+
#ruby /c/rubyinstallkit/dk.rb install
|
17
|
+
bundle install
|
18
|
+
rake test
|
19
|
+
# Ensure dev code is reachable
|
20
|
+
export RUBYLIB=k:/code/code_zauker/lib
|
21
|
+
#+end_src
|
22
|
+
|
23
|
+
|
24
|
+
** To Run tests
|
25
|
+
#+begin_src sh
|
26
|
+
rake test
|
27
|
+
#+end_src
|
28
|
+
|
29
|
+
** To release a new version to rubygem
|
30
|
+
#+begin_src sh
|
31
|
+
rake release
|
32
|
+
#+end_src
|
33
|
+
|
34
|
+
** Dependency management
|
35
|
+
Done with ruby "bundle", you should check periodically dependency
|
36
|
+
with "bundle update" to be sure to be with latest bug fixes of dependence libs
|
37
|
+
|
38
|
+
* Notable facts
|
39
|
+
** DB Size tradeoff
|
40
|
+
If trigram size is greather then 3, the database become larger, because of less collisions.
|
41
|
+
czlist work better with 4-grams then with 3-grams (a lot less false positive)
|
42
|
+
but the size can be 50% bigger
|
43
|
+
|
44
|
+
2-gram size rocks a lot, because of a very small db but false positive are a nightmare.
|
45
|
+
czlist give 2188 files with a "for", but grep report only 383 of them (less then 18% of success)
|
46
|
+
|
47
|
+
|
48
|
+
Emacs-lisp files spot a very huge number of trigrams
|
49
|
+
|
50
|
+
|
1
51
|
* Future/Study
|
2
52
|
To fulfill Google code options:
|
3
53
|
** Google code input
|
data/doc/CodeZauker.html
CHANGED
File without changes
|
data/doc/CodeZauker/CliUtil.html
CHANGED
File without changes
|
File without changes
|
File without changes
|
data/doc/CodeZauker/Util.html
CHANGED
File without changes
|
data/doc/Grep.html
CHANGED
File without changes
|
data/doc/_index.html
CHANGED
File without changes
|
data/doc/class_list.html
CHANGED
File without changes
|
data/doc/css/common.css
CHANGED
File without changes
|
data/doc/css/full_list.css
CHANGED
File without changes
|
data/doc/css/style.css
CHANGED
File without changes
|
data/doc/file_list.html
CHANGED
File without changes
|
data/doc/frames.html
CHANGED
File without changes
|
data/doc/index.html
CHANGED
File without changes
|
data/doc/js/app.js
CHANGED
File without changes
|
data/doc/js/full_list.js
CHANGED
File without changes
|
data/doc/js/jquery.js
CHANGED
File without changes
|
data/doc/method_list.html
CHANGED
File without changes
|
File without changes
|
data/etc/redis-win.conf
CHANGED
@@ -22,9 +22,9 @@ pidfile C:/TEMP/codezauker_redis.pid
|
|
22
22
|
|
23
23
|
# Accept connections on the specified port, default is 6379.
|
24
24
|
# If port 0 is specified Redis will not listen on a TCP socket.
|
25
|
-
|
25
|
+
port 6379
|
26
26
|
# Another port if you run a VM like me
|
27
|
-
port 6380
|
27
|
+
#port 6380
|
28
28
|
|
29
29
|
# If you want you can bind a single interface, if the bind option is not
|
30
30
|
# specified all the interfaces will listen for incoming connections.
|
data/etc/redis.conf
CHANGED
File without changes
|
data/htdocs/CodeZauker.gif
CHANGED
File without changes
|
data/htdocs/Gioorgi.gif
CHANGED
File without changes
|
File without changes
|
File without changes
|
data/htdocs/css/bootstrap.css
CHANGED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
data/htdocs/js/bootstrap.js
CHANGED
File without changes
|
data/htdocs/js/bootstrap.min.js
CHANGED
File without changes
|
data/lib/code_zauker.rb
CHANGED
@@ -2,12 +2,15 @@
|
|
2
2
|
require "code_zauker/version"
|
3
3
|
require "code_zauker/constants"
|
4
4
|
require 'code_zauker/grep'
|
5
|
-
require 'redis/connection/hiredis'
|
5
|
+
# require 'redis/connection/hiredis'
|
6
6
|
require 'redis'
|
7
7
|
require 'set'
|
8
8
|
require 'pdf/reader'
|
9
9
|
require 'date'
|
10
10
|
|
11
|
+
#require 'digest'
|
12
|
+
require 'digest/md5'
|
13
|
+
|
11
14
|
# This module implements a simple reverse indexer
|
12
15
|
# based on Redis
|
13
16
|
# The idea is ispired by http://swtch.com/~rsc/regexp/regexp4.html
|
@@ -199,9 +202,9 @@ module CodeZauker
|
|
199
202
|
end
|
200
203
|
end
|
201
204
|
end
|
202
|
-
if showlog
|
203
|
-
|
204
|
-
end
|
205
|
+
# if showlog
|
206
|
+
# puts " <Pushed #{s.length}..."
|
207
|
+
# end
|
205
208
|
puts "WARN: Some invalid UTF-8 char on #{filename} Case insensitive search will be compromised" if case_insensitive_trigram_failed
|
206
209
|
end
|
207
210
|
|
@@ -226,7 +229,7 @@ module CodeZauker
|
|
226
229
|
private :pushTrigramsSetRecoverable
|
227
230
|
|
228
231
|
|
229
|
-
def load(filename
|
232
|
+
def load(filename)
|
230
233
|
# Define my redis id...
|
231
234
|
# Already exists?...
|
232
235
|
fid=@redis.get "fscan:id:#{filename}"
|
@@ -237,10 +240,18 @@ module CodeZauker
|
|
237
240
|
@redis.set "fscan:id:#{filename}", fid
|
238
241
|
@redis.set "fscan:id2filename:#{fid}",filename
|
239
242
|
else
|
240
|
-
|
241
|
-
|
243
|
+
# ADD MD5 Checksum
|
244
|
+
#Digest::MD5.hexdigest("aaa")
|
245
|
+
fileDigest = Digest::MD5.hexdigest(File.read(filename))
|
246
|
+
storedDigest=@redis.get("cz:md5:#{filename}")
|
247
|
+
if(fileDigest!=storedDigest)
|
248
|
+
puts "#{filename} CHANGED...MD5: #{fileDigest} REINDEXING..."
|
249
|
+
self.remove([filename])
|
250
|
+
else
|
251
|
+
## puts "#{filename} id:#{fid} MD% UP TO DATE and NOT RELOADED"
|
242
252
|
return nil
|
243
253
|
end
|
254
|
+
|
244
255
|
end
|
245
256
|
# fid is the set key!...
|
246
257
|
trigramScanned=0
|
@@ -256,7 +267,7 @@ module CodeZauker
|
|
256
267
|
|
257
268
|
lines.each do |lineNotUTF8|
|
258
269
|
l= util.ensureUTF8(lineNotUTF8)
|
259
|
-
# Split each line into
|
270
|
+
# Split each line into GRAM_SIZE-char chunks, and store in a redis set
|
260
271
|
i=0
|
261
272
|
for istart in 0...(l.length-GRAM_SIZE)
|
262
273
|
trigram = l[istart, GRAM_SIZE]
|
@@ -271,7 +282,7 @@ module CodeZauker
|
|
271
282
|
s=Set.new()
|
272
283
|
end
|
273
284
|
trigramScanned += 1
|
274
|
-
#puts "#{istart}
|
285
|
+
#puts "#{istart} Gram fscan:#{trigram}/ FileId: #{fid}"
|
275
286
|
end
|
276
287
|
end
|
277
288
|
|
@@ -287,8 +298,13 @@ module CodeZauker
|
|
287
298
|
@redis.sadd "fscan:processedFiles", "#{filename}"
|
288
299
|
trigramRatio=( (trigramsOnFile*1.0) / trigramScanned )* 100.0
|
289
300
|
if trigramRatio < 10 or trigramRatio >75
|
290
|
-
puts "#{filename}\n\tRatio:#{trigramRatio.round}% Unique
|
301
|
+
puts "#{filename}\n\tRatio:#{trigramRatio.round}% Unique #{GRAM_SIZE}-grams:#{trigramsOnFile} Total Scanned: #{trigramScanned} ?Binary" if trigramRatio >90 and trigramsOnFile>70
|
291
302
|
end
|
303
|
+
|
304
|
+
# Register digest...do at last for better security
|
305
|
+
fileDigest = Digest::MD5.hexdigest(File.read(filename))
|
306
|
+
@redis.set("cz:md5:#{filename}",fileDigest)
|
307
|
+
|
292
308
|
return nil
|
293
309
|
end
|
294
310
|
|
@@ -344,11 +360,11 @@ module CodeZauker
|
|
344
360
|
# YourAppManager
|
345
361
|
def wsearch(term)
|
346
362
|
# Split stuff
|
347
|
-
puts "Wild Search request:#{term}"
|
363
|
+
#puts "Wild Search request:#{term}"
|
348
364
|
m=term.split("*")
|
349
365
|
if m.length>0
|
350
366
|
trigramInAnd=Set.new()
|
351
|
-
puts "*= Found:#{m.length}"
|
367
|
+
#puts "*= Found:#{m.length}"
|
352
368
|
m.each do | wtc |
|
353
369
|
wt=wtc.downcase()
|
354
370
|
#puts "Splitting #{wt}"
|
@@ -386,7 +402,7 @@ module CodeZauker
|
|
386
402
|
#puts "Reindexing... #{fileList.length} files..."
|
387
403
|
fileList.each do |current_file |
|
388
404
|
self.remove([current_file])
|
389
|
-
self.load(current_file
|
405
|
+
self.load(current_file)
|
390
406
|
end
|
391
407
|
end
|
392
408
|
|
data/lib/code_zauker/cli.rb
CHANGED
@@ -79,8 +79,8 @@ module CodeZauker
|
|
79
79
|
min=trigramsOnFile if trigramsOnFile <min and trigramsOnFile>0
|
80
80
|
end
|
81
81
|
av=sum/count
|
82
|
-
puts "Average
|
83
|
-
tagCharSize=max/
|
82
|
+
puts "Average -grams per file:#{av} Min: #{min} Max: #{max}"
|
83
|
+
tagCharSize=max/20
|
84
84
|
#tagCharSize=max/10 if tagCharSize>80
|
85
85
|
puts "Graphic summary... +=#{tagCharSize}"
|
86
86
|
ids.each do | fid |
|
@@ -88,7 +88,7 @@ module CodeZauker
|
|
88
88
|
if trigramsOnFile>= (tagCharSize*3)
|
89
89
|
fname=redis.get("fscan:id2filename:#{fid}")
|
90
90
|
bar="+"*(trigramsOnFile/tagCharSize)
|
91
|
-
puts "#{bar} #{fname}"
|
91
|
+
puts "#{trigramsOnFile} #{bar} #{fname}"
|
92
92
|
end
|
93
93
|
end
|
94
94
|
|
data/lib/code_zauker/grep.rb
CHANGED
File without changes
|
data/lib/code_zauker/version.rb
CHANGED
data/lib/code_zauker/webgui.rb
CHANGED
File without changes
|
data/readme.org
CHANGED
@@ -4,6 +4,23 @@ Code Zauker is based from ideas taken by old Google Code Search and uses Redis a
|
|
4
4
|
|
5
5
|
For news and discussion: http://gioorgi.com/tag/code-zauker/
|
6
6
|
|
7
|
+
* NEWS
|
8
|
+
** And an happy coding year! 2013
|
9
|
+
Version 0.1.0 spot an auto-reindexing system, 3-gram size and a new command, czlist.
|
10
|
+
CodeZauker will store a MD5 checksum for every file and reindex automatically changed files.
|
11
|
+
There is a small API change: FileScanner>>load will no longer accept noReload because it will
|
12
|
+
automatically use md5 to undestand if a reindex is needed.
|
13
|
+
Anyway, client code should relay on FileScanner>>reindex to force a reindex of a file.
|
14
|
+
This feature is still beta but works pretty well.
|
15
|
+
|
16
|
+
When a lot of reindexing is involved, performance can drop hard under 2 files per sec
|
17
|
+
|
18
|
+
Version 0.1.0 spot also a new command, /czlist/ which simplify integration with unix tool-chain.
|
19
|
+
czlist access directly to code zauker core to show only the filename
|
20
|
+
which could contains the searchstring.
|
21
|
+
czlist is ideal for IDE integration
|
22
|
+
|
23
|
+
|
7
24
|
|
8
25
|
* INSTALL
|
9
26
|
To install Code Zauker,simply issue
|
@@ -70,7 +87,7 @@ and enjoy!
|
|
70
87
|
* MS-Windows Compatibility
|
71
88
|
Grab your windows redis server at
|
72
89
|
https://github.com/dmajkic/redis/downloads
|
73
|
-
Version 0.0.9 has been
|
90
|
+
Version 0.0.9 and 0.1.0 has been successful tested with Redis 2.4.5 32bit version
|
74
91
|
You will find a
|
75
92
|
redis-win.conf example
|
76
93
|
to give you a fast-startup
|
@@ -79,6 +96,9 @@ to give you a fast-startup
|
|
79
96
|
* Release History
|
80
97
|
| Version | Date | Summary |
|
81
98
|
|---------+-------------+-------------------------------------------------------------------------------|
|
99
|
+
| 0.1.0 | | Added czlist command which supersedes czsearch. |
|
100
|
+
| | | Czindex now spot a better auto redindexing feature |
|
101
|
+
| | | Removed hiredis dependency for easier installation under ms-windows |
|
82
102
|
| 0.0.9 | 12 Oct 2012 | Removed case sensitive backend to improve space use. Er Zauker Compatibility. |
|
83
103
|
| | | Tested on MSWin |
|
84
104
|
| 0.0.8 | 04 Jun 2012 | Wildcard (*) search/better error handling of missed files/indexchecker |
|
@@ -90,9 +110,11 @@ to give you a fast-startup
|
|
90
110
|
| 0.0.2 | 29 Jan 2012 | Removed dependency on unix find for czindexer. |
|
91
111
|
| 0.0.1 | 26 Jan 2012 | First RubyGems Release (for testing purpose only) |
|
92
112
|
|
93
|
-
|
94
|
-
|
113
|
+
|
95
114
|
* DEVELOPING
|
96
115
|
For developing with Code Zauker you need bundler 1.0.21 or above
|
97
|
-
See devel.org file
|
116
|
+
See devel.org file for more information
|
117
|
+
|
118
|
+
* KNOWN BUGS / LIMITATIONS
|
119
|
+
At the time of writing, indexing emacs-lisp file is a very slow task.
|
98
120
|
|
data/templates/search.erb
CHANGED
File without changes
|
data/templates/show_results.erb
CHANGED
File without changes
|
File without changes
|
data/test/fixture/foolish.txt
CHANGED
File without changes
|
data/test/fixture/kurukku.txt
CHANGED
File without changes
|
File without changes
|
File without changes
|
data/test/fixture/wildtest.txt
CHANGED
File without changes
|
data/test/test_pdf_indexing.rb
CHANGED
File without changes
|
data/test/test_search.rb
CHANGED
@@ -23,7 +23,7 @@ class FileScannerBasicSearch < Test::Unit::TestCase
|
|
23
23
|
|
24
24
|
def test_scanner_trigram_simple
|
25
25
|
fs=CodeZauker::FileScanner.new()
|
26
|
-
fs.load("./readme.org"
|
26
|
+
fs.load("./readme.org")
|
27
27
|
fs.load("./test/fixture/kurukku.txt")
|
28
28
|
files=fs.search("kku")
|
29
29
|
assert (files[0].include?("fixture/kurukku.txt")==true)
|
@@ -66,14 +66,14 @@ class FileScannerBasicSearch < Test::Unit::TestCase
|
|
66
66
|
|
67
67
|
def test_very_big_file
|
68
68
|
fs=CodeZauker::FileScanner.new()
|
69
|
-
fs.load("./test/fixture/TEST_LICENSE.txt"
|
69
|
+
fs.load("./test/fixture/TEST_LICENSE.txt")
|
70
70
|
files=fs.search('"Commercial Use"')
|
71
71
|
assert files.include?("./test/fixture/TEST_LICENSE.txt")==true
|
72
72
|
end
|
73
73
|
|
74
74
|
def test_remove
|
75
75
|
fs=CodeZauker::FileScanner.new()
|
76
|
-
fs.load("./test/fixture/kurukku.txt"
|
76
|
+
fs.load("./test/fixture/kurukku.txt")
|
77
77
|
fs.remove(["./test/fixture/kurukku.txt"])
|
78
78
|
files=fs.search("\"Be hungry, be foolish\"")
|
79
79
|
assert files.length ==0,
|
@@ -86,7 +86,7 @@ class FileScannerBasicSearch < Test::Unit::TestCase
|
|
86
86
|
# require 'redis'
|
87
87
|
# redis=Redis.new
|
88
88
|
# fs=CodeZauker::FileScanner.new(redis)
|
89
|
-
# fs.load("./test/fixture/kurukku.txt"
|
89
|
+
# fs.load("./test/fixture/kurukku.txt")
|
90
90
|
# fs.removeAll()
|
91
91
|
# foundKeys=redis.keys "*"
|
92
92
|
# #puts "Keys at empty db:#{foundKeys}"
|
@@ -96,14 +96,14 @@ class FileScannerBasicSearch < Test::Unit::TestCase
|
|
96
96
|
# # 2012 Jan 30 New Case Insensitive Test cases
|
97
97
|
def test_case_insensitive1
|
98
98
|
fs=CodeZauker::FileScanner.new()
|
99
|
-
fs.load("./test/fixture/kurukku.txt"
|
99
|
+
fs.load("./test/fixture/kurukku.txt")
|
100
100
|
flist=fs.isearch("caseinsensitive Search TEST.")
|
101
101
|
assert flist.include?("./test/fixture/kurukku.txt"), "Case insensitive search failed. #{flist}"
|
102
102
|
end
|
103
103
|
|
104
104
|
def test_case_insensitive2
|
105
105
|
fs=CodeZauker::FileScanner.new()
|
106
|
-
fs.load("./test/fixture/kurukku.txt"
|
106
|
+
fs.load("./test/fixture/kurukku.txt")
|
107
107
|
flist=fs.isearch("caSeinsenSitive Search TEST.")
|
108
108
|
assert flist.include?("./test/fixture/kurukku.txt"), "Case insensitive search failed. #{flist}"
|
109
109
|
assert fs.search("CASeinsenSitivE").include?("./test/fixture/kurukku.txt"), "Search must be always insensitive"
|
@@ -111,7 +111,7 @@ class FileScannerBasicSearch < Test::Unit::TestCase
|
|
111
111
|
|
112
112
|
def test_case_insensitive3
|
113
113
|
fs=CodeZauker::FileScanner.new()
|
114
|
-
fs.load("./test/fixture/kurukku.txt"
|
114
|
+
fs.load("./test/fixture/kurukku.txt")
|
115
115
|
u=CodeZauker::Util.new()
|
116
116
|
(u.mixCase("CaSeinsen")).each { |t|
|
117
117
|
#puts "Checking #{t}"
|
data/test/test_wild_search.rb
CHANGED
File without changes
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: code_zauker
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0
|
4
|
+
version: 0.1.0
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,11 +9,11 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date:
|
12
|
+
date: 2013-05-21 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: yard
|
16
|
-
requirement:
|
16
|
+
requirement: !ruby/object:Gem::Requirement
|
17
17
|
none: false
|
18
18
|
requirements:
|
19
19
|
- - ~>
|
@@ -21,10 +21,15 @@ dependencies:
|
|
21
21
|
version: '0.7'
|
22
22
|
type: :development
|
23
23
|
prerelease: false
|
24
|
-
version_requirements:
|
24
|
+
version_requirements: !ruby/object:Gem::Requirement
|
25
|
+
none: false
|
26
|
+
requirements:
|
27
|
+
- - ~>
|
28
|
+
- !ruby/object:Gem::Version
|
29
|
+
version: '0.7'
|
25
30
|
- !ruby/object:Gem::Dependency
|
26
31
|
name: rubyzip
|
27
|
-
requirement:
|
32
|
+
requirement: !ruby/object:Gem::Requirement
|
28
33
|
none: false
|
29
34
|
requirements:
|
30
35
|
- - ~>
|
@@ -32,21 +37,15 @@ dependencies:
|
|
32
37
|
version: '0.9'
|
33
38
|
type: :development
|
34
39
|
prerelease: false
|
35
|
-
version_requirements:
|
36
|
-
- !ruby/object:Gem::Dependency
|
37
|
-
name: hiredis
|
38
|
-
requirement: &79399600 !ruby/object:Gem::Requirement
|
40
|
+
version_requirements: !ruby/object:Gem::Requirement
|
39
41
|
none: false
|
40
42
|
requirements:
|
41
43
|
- - ~>
|
42
44
|
- !ruby/object:Gem::Version
|
43
|
-
version: '0.
|
44
|
-
type: :runtime
|
45
|
-
prerelease: false
|
46
|
-
version_requirements: *79399600
|
45
|
+
version: '0.9'
|
47
46
|
- !ruby/object:Gem::Dependency
|
48
47
|
name: redis
|
49
|
-
requirement:
|
48
|
+
requirement: !ruby/object:Gem::Requirement
|
50
49
|
none: false
|
51
50
|
requirements:
|
52
51
|
- - ~>
|
@@ -54,10 +53,15 @@ dependencies:
|
|
54
53
|
version: '2.2'
|
55
54
|
type: :runtime
|
56
55
|
prerelease: false
|
57
|
-
version_requirements:
|
56
|
+
version_requirements: !ruby/object:Gem::Requirement
|
57
|
+
none: false
|
58
|
+
requirements:
|
59
|
+
- - ~>
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: '2.2'
|
58
62
|
- !ruby/object:Gem::Dependency
|
59
63
|
name: pdf-reader
|
60
|
-
requirement:
|
64
|
+
requirement: !ruby/object:Gem::Requirement
|
61
65
|
none: false
|
62
66
|
requirements:
|
63
67
|
- - ~>
|
@@ -65,10 +69,15 @@ dependencies:
|
|
65
69
|
version: 1.0.0
|
66
70
|
type: :runtime
|
67
71
|
prerelease: false
|
68
|
-
version_requirements:
|
72
|
+
version_requirements: !ruby/object:Gem::Requirement
|
73
|
+
none: false
|
74
|
+
requirements:
|
75
|
+
- - ~>
|
76
|
+
- !ruby/object:Gem::Version
|
77
|
+
version: 1.0.0
|
69
78
|
- !ruby/object:Gem::Dependency
|
70
79
|
name: sinatra
|
71
|
-
requirement:
|
80
|
+
requirement: !ruby/object:Gem::Requirement
|
72
81
|
none: false
|
73
82
|
requirements:
|
74
83
|
- - ~>
|
@@ -76,10 +85,15 @@ dependencies:
|
|
76
85
|
version: '1.3'
|
77
86
|
type: :runtime
|
78
87
|
prerelease: false
|
79
|
-
version_requirements:
|
88
|
+
version_requirements: !ruby/object:Gem::Requirement
|
89
|
+
none: false
|
90
|
+
requirements:
|
91
|
+
- - ~>
|
92
|
+
- !ruby/object:Gem::Version
|
93
|
+
version: '1.3'
|
80
94
|
- !ruby/object:Gem::Dependency
|
81
95
|
name: redis_logger
|
82
|
-
requirement:
|
96
|
+
requirement: !ruby/object:Gem::Requirement
|
83
97
|
none: false
|
84
98
|
requirements:
|
85
99
|
- - ~>
|
@@ -87,13 +101,19 @@ dependencies:
|
|
87
101
|
version: '0.1'
|
88
102
|
type: :runtime
|
89
103
|
prerelease: false
|
90
|
-
version_requirements:
|
104
|
+
version_requirements: !ruby/object:Gem::Requirement
|
105
|
+
none: false
|
106
|
+
requirements:
|
107
|
+
- - ~>
|
108
|
+
- !ruby/object:Gem::Version
|
109
|
+
version: '0.1'
|
91
110
|
description: Code Zauker is based from ideas taken by old Google Code Search and uses
|
92
111
|
Redis as a basic platform
|
93
112
|
email:
|
94
113
|
- jj@gioorgi.com
|
95
114
|
executables:
|
96
115
|
- czindexer
|
116
|
+
- czlist
|
97
117
|
- czsearch
|
98
118
|
- mczindexer
|
99
119
|
- report.rb
|
@@ -108,6 +128,7 @@ files:
|
|
108
128
|
- LICENSE.txt
|
109
129
|
- Rakefile
|
110
130
|
- bin/czindexer
|
131
|
+
- bin/czlist
|
111
132
|
- bin/czsearch
|
112
133
|
- bin/mczindexer
|
113
134
|
- bin/report.rb
|
@@ -184,7 +205,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
184
205
|
version: '0'
|
185
206
|
requirements: []
|
186
207
|
rubyforge_project: code_zauker
|
187
|
-
rubygems_version: 1.8.
|
208
|
+
rubygems_version: 1.8.24
|
188
209
|
signing_key:
|
189
210
|
specification_version: 3
|
190
211
|
summary: A search engine for programming languages
|