code_zauker 0.0.9 → 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (64) hide show
  1. data/.gitignore +0 -0
  2. data/BUGS.org +6 -0
  3. data/Gemfile +0 -0
  4. data/LICENSE.txt +0 -0
  5. data/Rakefile +0 -0
  6. data/bin/czindexer +2 -2
  7. data/bin/czlist +122 -0
  8. data/bin/czsearch +2 -164
  9. data/bin/mczindexer +0 -0
  10. data/bin/report.rb +0 -1
  11. data/bin/startRedis +0 -0
  12. data/bin/webgui +0 -0
  13. data/code_zauker.gemspec +1 -1
  14. data/devel.org +50 -0
  15. data/doc/CodeZauker.html +0 -0
  16. data/doc/CodeZauker/CliUtil.html +0 -0
  17. data/doc/CodeZauker/FileScanner.html +0 -0
  18. data/doc/CodeZauker/IndexManager.html +0 -0
  19. data/doc/CodeZauker/Util.html +0 -0
  20. data/doc/Grep.html +0 -0
  21. data/doc/_index.html +0 -0
  22. data/doc/class_list.html +0 -0
  23. data/doc/css/common.css +0 -0
  24. data/doc/css/full_list.css +0 -0
  25. data/doc/css/style.css +0 -0
  26. data/doc/file_list.html +0 -0
  27. data/doc/frames.html +0 -0
  28. data/doc/index.html +0 -0
  29. data/doc/js/app.js +0 -0
  30. data/doc/js/full_list.js +0 -0
  31. data/doc/js/jquery.js +0 -0
  32. data/doc/method_list.html +0 -0
  33. data/doc/top-level-namespace.html +0 -0
  34. data/etc/redis-win.conf +2 -2
  35. data/etc/redis.conf +0 -0
  36. data/htdocs/CodeZauker.gif +0 -0
  37. data/htdocs/Gioorgi.gif +0 -0
  38. data/htdocs/css/bootstrap-responsive.css +0 -0
  39. data/htdocs/css/bootstrap-responsive.min.css +0 -0
  40. data/htdocs/css/bootstrap.css +0 -0
  41. data/htdocs/css/bootstrap.min.css +0 -0
  42. data/htdocs/img/glyphicons-halflings-white.png +0 -0
  43. data/htdocs/img/glyphicons-halflings.png +0 -0
  44. data/htdocs/js/bootstrap.js +0 -0
  45. data/htdocs/js/bootstrap.min.js +0 -0
  46. data/lib/code_zauker.rb +29 -13
  47. data/lib/code_zauker/cli.rb +3 -3
  48. data/lib/code_zauker/constants.rb +2 -2
  49. data/lib/code_zauker/grep.rb +0 -0
  50. data/lib/code_zauker/version.rb +1 -1
  51. data/lib/code_zauker/webgui.rb +0 -0
  52. data/readme.org +26 -4
  53. data/templates/search.erb +0 -0
  54. data/templates/show_results.erb +0 -0
  55. data/test/fixture/TEST_LICENSE.txt +0 -0
  56. data/test/fixture/foolish.txt +0 -0
  57. data/test/fixture/kurukku.txt +0 -0
  58. data/test/fixture/simple_test.pdf +0 -0
  59. data/test/fixture/testArchive.zip +0 -0
  60. data/test/fixture/wildtest.txt +0 -0
  61. data/test/test_pdf_indexing.rb +0 -0
  62. data/test/test_search.rb +7 -7
  63. data/test/test_wild_search.rb +0 -0
  64. metadata +43 -22
data/.gitignore CHANGED
File without changes
data/BUGS.org CHANGED
@@ -2,3 +2,9 @@
2
2
  * Bug 001 :wontfix_soon:
3
3
  Indexing a 700Kb gem take too much time, it seems looping
4
4
  Avoid indexing gem file for the meantime.
5
+ * Bug 002 :limitation:
6
+ Reindexing does not work very well. Code Zauker will not automatically detect a file has changed.
7
+ Implement a md5 checksum support for reindexing changed files fast
8
+
9
+ * Bug 003 :low:
10
+ Avoid keys() command because it is deprecated for normal usage scenario
data/Gemfile CHANGED
File without changes
File without changes
data/Rakefile CHANGED
File without changes
@@ -3,7 +3,7 @@
3
3
  # find test/fixture/ -type f | xargs -P 5 -n 10 ./bin/czindexer
4
4
  # will fire 5 czindexer each with 10 files to process...
5
5
  require 'code_zauker/cli'
6
- require 'redis/connection/hiredis'
6
+ #require 'redis/connection/hiredis'
7
7
  require 'redis'
8
8
  require 'optparse'
9
9
  options={}
@@ -86,7 +86,7 @@ def processElement(l,fs,options)
86
86
  if options[:reindex] == true
87
87
  fs.reindex([l])
88
88
  else
89
- fs.load(l,noReload=true)
89
+ fs.load(l)
90
90
  end
91
91
  timeTaken=Time.now-startTime
92
92
  $PROCESSED_FILES+=1
@@ -0,0 +1,122 @@
1
+ #!/usr/bin/env ruby
2
+ #== czlist list only the files which can contain the required string
3
+ # should be combined with "xargs grep/egrep"
4
+ # Ideal for ide integration
5
+ # Simpler then czsearch
6
+ $VERBOSE=nil
7
+ require 'code_zauker'
8
+ require 'code_zauker/cli'
9
+ #require 'redis/connection/hiredis'
10
+ require 'redis'
11
+ #include Grep
12
+
13
+ require 'optparse'
14
+ options={}
15
+ optparse= OptionParser.new do |opts|
16
+ opts.banner="Usage: czlist [options] [term1] [term2]..."
17
+ options[:extensions_to_ignore]=[]
18
+ options[:file_to_exclude]=[]
19
+ options[:redis_host]="127.0.0.1"
20
+ options[:redis_port]=6379
21
+ options[:redis_password]=nil
22
+ options[:be_wild]=true
23
+
24
+
25
+
26
+ opts.on('-X','--exclude FILE_PATTERN',String,
27
+ 'Exclude files that match FILE_PATTERN (as ruby regexp). Case insensitive') do |p|
28
+ options[:file_to_exclude].push(/#{Regexp.escape(p)}/i);
29
+ end
30
+
31
+ opts.on('-w','--wild','Do a wildcharacter search. * means "every char". True by default') do
32
+ options[:be_wild] = true
33
+ options[:ignorecase]=true
34
+ end
35
+
36
+ opts.on('-e','--exact','Disable wild search Userful if you need to search * or exact matches ') do
37
+ options[:be_wild] = false
38
+ options[:ignorecase]=true
39
+ end
40
+
41
+
42
+ opts.on('--redis-server pass@SERVER:port', String,
43
+ 'Specify the alternate redis server to use')do |server|
44
+ myoptions=CodeZauker::CliUtil.new().parse_host_options(server)
45
+ options[:redis_host]=myoptions[:redis_host]
46
+ options[:redis_port]=myoptions[:redis_port]
47
+ options[:redis_password]=myoptions[:redis_password]
48
+
49
+ if options[:redis_password]
50
+ #puts "Server: #{options[:redis_host]} Port:#{options[:redis_port]} WithPassword"
51
+ else
52
+ #puts "Server: #{options[:redis_host]} Port:#{options[:redis_port]}"
53
+ end
54
+ end
55
+
56
+
57
+
58
+ opts.on( '-h', '--help', 'Display this screen' ) do
59
+ puts opts
60
+ puts "EXAMPLES:"
61
+ puts "czlist for"
62
+ puts " Will search for loops and return the file smatching it"
63
+ puts "czlist -w 'public*class School'"
64
+ puts " Will seach for a java class called School ignoring characters between public and class."
65
+ puts "czlist for | xargs grep for"
66
+ puts " will be quite the same of czsearch but faster."
67
+ puts "Search is always case insensitive and wild by default"
68
+ exit
69
+ end
70
+ end
71
+ optparse.parse!
72
+
73
+ ARGV.each do | s |
74
+ #puts "Code Zauker Searching for #{s}"
75
+ util=CodeZauker::Util.new()
76
+ redisConnection=Redis.new(:host => options[:redis_host], :port => options[:redis_port], :password=> options[:redis_password])
77
+ fs=CodeZauker::FileScanner.new(redisConnection)
78
+
79
+ if options[:be_wild]==true
80
+ cli=CodeZauker::CliUtil.new()
81
+ r=cli.doWildSearch(s,fs)
82
+ files= r[:files]
83
+ pattern=r[:regexp]
84
+ else
85
+ # It uses always isearch
86
+ # and delegates to the grep subsystem to find it out
87
+ files=fs.isearch(s)
88
+ end
89
+
90
+ files.each do |f|
91
+ to_exclude=false
92
+ if options[:file_to_exclude].length >0
93
+ # Will match?
94
+ to_exclude=false
95
+ options[:file_to_exclude].each do |pattern|
96
+ #puts "\n\t#{f} =~ #{pattern}"
97
+ if (f =~ pattern )
98
+ to_exclude=true
99
+ #puts "Excluded #{f}"
100
+ break
101
+ end
102
+ end
103
+ end
104
+
105
+ # Does it exist?
106
+ if !to_exclude && !File.exists?(f)
107
+ #puts "WARN: Not FOUND #{f}"
108
+ to_exclude=true
109
+ end
110
+
111
+ if !to_exclude
112
+ begin
113
+ puts "#{f}"
114
+ rescue ArgumentError => ioe
115
+ puts "FATAL ArgumentError on #{f}"
116
+ raise ioe
117
+ end
118
+ else
119
+
120
+ end
121
+ end
122
+ end
@@ -1,164 +1,2 @@
1
- #!/usr/bin/env ruby
2
- #== czsearch is a userful command to search via the Code Zauker facility
3
- # Send something like -W0 to ruby, for a cleaner output
4
- $VERBOSE=nil
5
- require 'code_zauker'
6
- require 'code_zauker/grep'
7
- require 'code_zauker/cli'
8
- require 'redis/connection/hiredis'
9
- require 'redis'
10
- require 'tempfile'
11
- require 'pdf/reader'
12
- include Grep
13
-
14
- require 'optparse'
15
- options={}
16
- optparse= OptionParser.new do |opts|
17
- opts.banner="Usage: czsearch [options] [term1] [term2]..."
18
- options[:ignorecase]=false
19
- options[:precontext]=0
20
- options[:postcontext]=0
21
- options[:extensions_to_ignore]=[]
22
- options[:file_to_exclude]=[]
23
- options[:redis_host]="127.0.0.1"
24
- options[:redis_port]=6379
25
- options[:redis_password]=nil
26
- options[:be_wild]=false
27
-
28
- opts.on('-i', '--ignore-case','ignore case distinctions') do
29
- options[:ignorecase]=true
30
- end
31
-
32
- opts.on('-B', '--before-context NUM', Integer, 'print NUM lines of leading context') do | c |
33
- options[:precontext]=c
34
- end
35
-
36
- opts.on('-A','--after-context NUM',Integer,'print NUM lines of trailing context') do | c |
37
- options[:postcontext]=c
38
- end
39
- opts.on('-C','--context NUM',Integer,'print NUM lines of output context') do | c |
40
- if c>0
41
- options[:postcontext]=c
42
- options[:precontext]=options[:postcontext]
43
- end
44
- end
45
-
46
-
47
- opts.on('-X','--exclude FILE_PATTERN',String,
48
- 'Exclude files that match FILE_PATTERN (as ruby regexp). Case insensitive') do |p|
49
- options[:file_to_exclude].push(/#{Regexp.escape(p)}/i);
50
- end
51
-
52
- opts.on('-w','--wild','Do a wildcharacter search. * means "every char". Imply -i') do
53
- options[:be_wild] = true
54
- options[:ignorecase]=true
55
- end
56
-
57
-
58
- opts.on('-h','--redis-server pass@SERVER:port', String,
59
- 'Specify the alternate redis server to use')do |server|
60
- myoptions=CodeZauker::CliUtil.new().parse_host_options(server)
61
- options[:redis_host]=myoptions[:redis_host]
62
- options[:redis_port]=myoptions[:redis_port]
63
- options[:redis_password]=myoptions[:redis_password]
64
-
65
- if options[:redis_password]
66
- puts "Server: #{options[:redis_host]} Port:#{options[:redis_port]} WithPassword"
67
- else
68
- puts "Server: #{options[:redis_host]} Port:#{options[:redis_port]}"
69
- end
70
- end
71
-
72
-
73
-
74
- opts.on( '-h', '--help', 'Display this screen' ) do
75
- puts opts
76
- puts "EXAMPLES:"
77
- puts "czsearch ciao Koros"
78
- puts " Will search Koros OR ciao"
79
- puts "czsearch -i gnu"
80
- puts " Will match also GNU and Gnu"
81
- puts "czsearch -X .orig -X .bak -X .java html:select"
82
- puts " Will skip java and backup file"
83
- puts "czsearch -w 'public*class School'"
84
- puts " Will seach for a java class called School ignoring characters between public and class."
85
- exit
86
- end
87
- end
88
- optparse.parse!
89
-
90
- ARGV.each do | s |
91
- #puts "Code Zauker Searching for #{s}"
92
- util=CodeZauker::Util.new()
93
- redisConnection=Redis.new(:host => options[:redis_host], :port => options[:redis_port], :password=> options[:redis_password])
94
- fs=CodeZauker::FileScanner.new(redisConnection)
95
-
96
- if options[:be_wild]==true
97
- puts "Wild MODE"
98
- cli=CodeZauker::CliUtil.new()
99
- r=cli.doWildSearch(s,fs)
100
- files= r[:files]
101
- pattern=r[:regexp]
102
- else
103
- # It uses always isearch
104
- # and delegates to the grep subsystem to find it out
105
- files=fs.isearch(s)
106
- if options[:ignorecase]==false
107
- pattern=/#{Regexp.escape(s)}/
108
- else
109
- pattern=/#{Regexp.escape(s)}/i
110
- end
111
- end
112
-
113
- files.each do |f|
114
- to_exclude=false
115
- if options[:file_to_exclude].length >0
116
- # Will match?
117
- to_exclude=false
118
- options[:file_to_exclude].each do |pattern|
119
- #puts "\n\t#{f} =~ #{pattern}"
120
- if (f =~ pattern )
121
- to_exclude=true
122
- #puts "Excluded #{f}"
123
- break
124
- end
125
- end
126
- end
127
-
128
- # Does it exist?
129
- if !to_exclude && !File.exists?(f)
130
- #puts "WARN: Not FOUND #{f}"
131
- to_exclude=true
132
- end
133
-
134
- if !to_exclude
135
- begin
136
- if util.is_pdf?(f)==false
137
- lines=grep(f,pattern, pre_context=options[:precontext], post_context=options[:postcontext]);
138
- lines.each do |l |
139
- puts "#{f}:#{l}"
140
- end
141
- else
142
- puts "#{f} Pdf matches"
143
- #Using pdf/reader we can do a search here but we must store the stuff
144
- # in a temp file
145
-
146
- tempfile =Tempfile.new("czsearch_pdf.tmp")
147
- tempfile.write(util.get_lines(f).join("\n"))
148
- tempfile.close
149
- #puts "Temp PDF into #{tempfile.path}"
150
- lines=grep(tempfile.path,pattern, pre_context=options[:precontext], post_context=options[:postcontext]);
151
- lines.each do |l |
152
- puts "#{f}:#{l}"
153
- end
154
- tempfile.unlink
155
- end
156
- rescue ArgumentError => ioe
157
- puts "FATAL ArgumentError on #{f}"
158
- raise ioe
159
- end
160
- else
161
-
162
- end
163
- end
164
- end
1
+ #!/bin/bash
2
+ czlist --redis-server 10.0.2.2:6380 $* | xargs grep $*
File without changes
@@ -3,7 +3,6 @@ require 'set'
3
3
  require 'code_zauker'
4
4
  require 'code_zauker/grep'
5
5
  require 'code_zauker/cli'
6
- require 'redis/connection/hiredis'
7
6
  require 'redis'
8
7
  require 'tempfile'
9
8
  require 'pdf/reader'
File without changes
data/bin/webgui CHANGED
File without changes
@@ -23,7 +23,7 @@ Gem::Specification.new do |s|
23
23
  s.add_development_dependency "yard", "~>0.7"
24
24
  s.add_development_dependency "rubyzip", "~> 0.9"
25
25
 
26
- s.add_runtime_dependency "hiredis", "~> 0.3"
26
+ ## s.add_runtime_dependency "hiredis", "~> 0.3"
27
27
  s.add_runtime_dependency "redis", "~> 2.2"
28
28
  s.add_runtime_dependency "pdf-reader", "~> 1.0.0"
29
29
  s.add_runtime_dependency "sinatra", "~> 1.3"
data/devel.org CHANGED
@@ -1,3 +1,53 @@
1
+ * Basic commands
2
+ Make sure to uninstall code zauker from your gems (gem uninstall code_zauker)
3
+ before start developing
4
+ ** Environment setup (windows)
5
+
6
+ Ensure you have Dev kit too
7
+ http://rubyinstaller.org/downloads
8
+ https://github.com/oneclick/rubyinstaller/wiki/Development-Kit
9
+ It is for hiredis: hiredis is not mandatory, but suggested
10
+
11
+
12
+ #+begin_src sh
13
+ gem install bundler
14
+ # Dev kit installation...
15
+ #ruby /c/rubyinstallkit/dk.rb init
16
+ #ruby /c/rubyinstallkit/dk.rb install
17
+ bundle install
18
+ rake test
19
+ # Ensure dev code is reachable
20
+ export RUBYLIB=k:/code/code_zauker/lib
21
+ #+end_src
22
+
23
+
24
+ ** To Run tests
25
+ #+begin_src sh
26
+ rake test
27
+ #+end_src
28
+
29
+ ** To release a new version to rubygem
30
+ #+begin_src sh
31
+ rake release
32
+ #+end_src
33
+
34
+ ** Dependency management
35
+ Done with ruby "bundle", you should check periodically dependency
36
+ with "bundle update" to be sure to be with latest bug fixes of dependence libs
37
+
38
+ * Notable facts
39
+ ** DB Size tradeoff
40
+ If trigram size is greather then 3, the database become larger, because of less collisions.
41
+ czlist work better with 4-grams then with 3-grams (a lot less false positive)
42
+ but the size can be 50% bigger
43
+
44
+ 2-gram size rocks a lot, because of a very small db but false positive are a nightmare.
45
+ czlist give 2188 files with a "for", but grep report only 383 of them (less then 18% of success)
46
+
47
+
48
+ Emacs-lisp files spot a very huge number of trigrams
49
+
50
+
1
51
  * Future/Study
2
52
  To fulfill Google code options:
3
53
  ** Google code input
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
@@ -22,9 +22,9 @@ pidfile C:/TEMP/codezauker_redis.pid
22
22
 
23
23
  # Accept connections on the specified port, default is 6379.
24
24
  # If port 0 is specified Redis will not listen on a TCP socket.
25
- #port 6379
25
+ port 6379
26
26
  # Another port if you run a VM like me
27
- port 6380
27
+ #port 6380
28
28
 
29
29
  # If you want you can bind a single interface, if the bind option is not
30
30
  # specified all the interfaces will listen for incoming connections.
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
@@ -2,12 +2,15 @@
2
2
  require "code_zauker/version"
3
3
  require "code_zauker/constants"
4
4
  require 'code_zauker/grep'
5
- require 'redis/connection/hiredis'
5
+ # require 'redis/connection/hiredis'
6
6
  require 'redis'
7
7
  require 'set'
8
8
  require 'pdf/reader'
9
9
  require 'date'
10
10
 
11
+ #require 'digest'
12
+ require 'digest/md5'
13
+
11
14
  # This module implements a simple reverse indexer
12
15
  # based on Redis
13
16
  # The idea is ispired by http://swtch.com/~rsc/regexp/regexp4.html
@@ -199,9 +202,9 @@ module CodeZauker
199
202
  end
200
203
  end
201
204
  end
202
- if showlog
203
- puts " <Pushed #{s.length}..."
204
- end
205
+ # if showlog
206
+ # puts " <Pushed #{s.length}..."
207
+ # end
205
208
  puts "WARN: Some invalid UTF-8 char on #{filename} Case insensitive search will be compromised" if case_insensitive_trigram_failed
206
209
  end
207
210
 
@@ -226,7 +229,7 @@ module CodeZauker
226
229
  private :pushTrigramsSetRecoverable
227
230
 
228
231
 
229
- def load(filename, noReload=false)
232
+ def load(filename)
230
233
  # Define my redis id...
231
234
  # Already exists?...
232
235
  fid=@redis.get "fscan:id:#{filename}"
@@ -237,10 +240,18 @@ module CodeZauker
237
240
  @redis.set "fscan:id:#{filename}", fid
238
241
  @redis.set "fscan:id2filename:#{fid}",filename
239
242
  else
240
- if noReload
241
- #puts "Already found #{filename} as id:#{fid} and NOT RELOADED"
243
+ # ADD MD5 Checksum
244
+ #Digest::MD5.hexdigest("aaa")
245
+ fileDigest = Digest::MD5.hexdigest(File.read(filename))
246
+ storedDigest=@redis.get("cz:md5:#{filename}")
247
+ if(fileDigest!=storedDigest)
248
+ puts "#{filename} CHANGED...MD5: #{fileDigest} REINDEXING..."
249
+ self.remove([filename])
250
+ else
251
+ ## puts "#{filename} id:#{fid} MD% UP TO DATE and NOT RELOADED"
242
252
  return nil
243
253
  end
254
+
244
255
  end
245
256
  # fid is the set key!...
246
257
  trigramScanned=0
@@ -256,7 +267,7 @@ module CodeZauker
256
267
 
257
268
  lines.each do |lineNotUTF8|
258
269
  l= util.ensureUTF8(lineNotUTF8)
259
- # Split each line into 3-char chunks, and store in a redis set
270
+ # Split each line into GRAM_SIZE-char chunks, and store in a redis set
260
271
  i=0
261
272
  for istart in 0...(l.length-GRAM_SIZE)
262
273
  trigram = l[istart, GRAM_SIZE]
@@ -271,7 +282,7 @@ module CodeZauker
271
282
  s=Set.new()
272
283
  end
273
284
  trigramScanned += 1
274
- #puts "#{istart} Trigram fscan:#{trigram}/ FileId: #{fid}"
285
+ #puts "#{istart} Gram fscan:#{trigram}/ FileId: #{fid}"
275
286
  end
276
287
  end
277
288
 
@@ -287,8 +298,13 @@ module CodeZauker
287
298
  @redis.sadd "fscan:processedFiles", "#{filename}"
288
299
  trigramRatio=( (trigramsOnFile*1.0) / trigramScanned )* 100.0
289
300
  if trigramRatio < 10 or trigramRatio >75
290
- puts "#{filename}\n\tRatio:#{trigramRatio.round}% Unique Trigrams:#{trigramsOnFile} Total Scanned: #{trigramScanned} ?Binary" if trigramRatio >90 and trigramsOnFile>70
301
+ puts "#{filename}\n\tRatio:#{trigramRatio.round}% Unique #{GRAM_SIZE}-grams:#{trigramsOnFile} Total Scanned: #{trigramScanned} ?Binary" if trigramRatio >90 and trigramsOnFile>70
291
302
  end
303
+
304
+ # Register digest...do at last for better security
305
+ fileDigest = Digest::MD5.hexdigest(File.read(filename))
306
+ @redis.set("cz:md5:#{filename}",fileDigest)
307
+
292
308
  return nil
293
309
  end
294
310
 
@@ -344,11 +360,11 @@ module CodeZauker
344
360
  # YourAppManager
345
361
  def wsearch(term)
346
362
  # Split stuff
347
- puts "Wild Search request:#{term}"
363
+ #puts "Wild Search request:#{term}"
348
364
  m=term.split("*")
349
365
  if m.length>0
350
366
  trigramInAnd=Set.new()
351
- puts "*= Found:#{m.length}"
367
+ #puts "*= Found:#{m.length}"
352
368
  m.each do | wtc |
353
369
  wt=wtc.downcase()
354
370
  #puts "Splitting #{wt}"
@@ -386,7 +402,7 @@ module CodeZauker
386
402
  #puts "Reindexing... #{fileList.length} files..."
387
403
  fileList.each do |current_file |
388
404
  self.remove([current_file])
389
- self.load(current_file,noReload=false)
405
+ self.load(current_file)
390
406
  end
391
407
  end
392
408
 
@@ -79,8 +79,8 @@ module CodeZauker
79
79
  min=trigramsOnFile if trigramsOnFile <min and trigramsOnFile>0
80
80
  end
81
81
  av=sum/count
82
- puts "Average Trigrams per file:#{av} Min: #{min} Max: #{max}"
83
- tagCharSize=max/80
82
+ puts "Average -grams per file:#{av} Min: #{min} Max: #{max}"
83
+ tagCharSize=max/20
84
84
  #tagCharSize=max/10 if tagCharSize>80
85
85
  puts "Graphic summary... +=#{tagCharSize}"
86
86
  ids.each do | fid |
@@ -88,7 +88,7 @@ module CodeZauker
88
88
  if trigramsOnFile>= (tagCharSize*3)
89
89
  fname=redis.get("fscan:id2filename:#{fid}")
90
90
  bar="+"*(trigramsOnFile/tagCharSize)
91
- puts "#{bar} #{fname}"
91
+ puts "#{trigramsOnFile} #{bar} #{fname}"
92
92
  end
93
93
  end
94
94
 
@@ -40,8 +40,8 @@ module CodeZauker
40
40
  ".mp3",".mp4",".wav",
41
41
  # Image exclusion
42
42
  ".png",".gif",".jpg",".bmp",
43
- # Temp stuff
44
- ".tmp","~",
43
+ # Temp stuff and logs
44
+ ".tmp","~",".log",".bar",
45
45
  # Oracle exports...
46
46
  ".exp"
47
47
  ]
File without changes
@@ -1,4 +1,4 @@
1
1
  module CodeZauker
2
- VERSION = "0.0.9"
2
+ VERSION = "0.1.0"
3
3
  DB_VERSION = 1
4
4
  end
File without changes
data/readme.org CHANGED
@@ -4,6 +4,23 @@ Code Zauker is based from ideas taken by old Google Code Search and uses Redis a
4
4
 
5
5
  For news and discussion: http://gioorgi.com/tag/code-zauker/
6
6
 
7
+ * NEWS
8
+ ** And an happy coding year! 2013
9
+ Version 0.1.0 spot an auto-reindexing system, 3-gram size and a new command, czlist.
10
+ CodeZauker will store a MD5 checksum for every file and reindex automatically changed files.
11
+ There is a small API change: FileScanner>>load will no longer accept noReload because it will
12
+ automatically use md5 to undestand if a reindex is needed.
13
+ Anyway, client code should relay on FileScanner>>reindex to force a reindex of a file.
14
+ This feature is still beta but works pretty well.
15
+
16
+ When a lot of reindexing is involved, performance can drop hard under 2 files per sec
17
+
18
+ Version 0.1.0 spot also a new command, /czlist/ which simplify integration with unix tool-chain.
19
+ czlist access directly to code zauker core to show only the filename
20
+ which could contains the searchstring.
21
+ czlist is ideal for IDE integration
22
+
23
+
7
24
 
8
25
  * INSTALL
9
26
  To install Code Zauker,simply issue
@@ -70,7 +87,7 @@ and enjoy!
70
87
  * MS-Windows Compatibility
71
88
  Grab your windows redis server at
72
89
  https://github.com/dmajkic/redis/downloads
73
- Version 0.0.9 has been succesful tested with Redis 2.4.5 32bit version
90
+ Version 0.0.9 and 0.1.0 has been successful tested with Redis 2.4.5 32bit version
74
91
  You will find a
75
92
  redis-win.conf example
76
93
  to give you a fast-startup
@@ -79,6 +96,9 @@ to give you a fast-startup
79
96
  * Release History
80
97
  | Version | Date | Summary |
81
98
  |---------+-------------+-------------------------------------------------------------------------------|
99
+ | 0.1.0 | | Added czlist command which supersedes czsearch. |
100
+ | | | Czindex now spot a better auto redindexing feature |
101
+ | | | Removed hiredis dependency for easier installation under ms-windows |
82
102
  | 0.0.9 | 12 Oct 2012 | Removed case sensitive backend to improve space use. Er Zauker Compatibility. |
83
103
  | | | Tested on MSWin |
84
104
  | 0.0.8 | 04 Jun 2012 | Wildcard (*) search/better error handling of missed files/indexchecker |
@@ -90,9 +110,11 @@ to give you a fast-startup
90
110
  | 0.0.2 | 29 Jan 2012 | Removed dependency on unix find for czindexer. |
91
111
  | 0.0.1 | 26 Jan 2012 | First RubyGems Release (for testing purpose only) |
92
112
 
93
-
94
-
113
+
95
114
  * DEVELOPING
96
115
  For developing with Code Zauker you need bundler 1.0.21 or above
97
- See devel.org file
116
+ See devel.org file for more information
117
+
118
+ * KNOWN BUGS / LIMITATIONS
119
+ At the time of writing, indexing emacs-lisp file is a very slow task.
98
120
 
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
@@ -23,7 +23,7 @@ class FileScannerBasicSearch < Test::Unit::TestCase
23
23
 
24
24
  def test_scanner_trigram_simple
25
25
  fs=CodeZauker::FileScanner.new()
26
- fs.load("./readme.org",noReload=true)
26
+ fs.load("./readme.org")
27
27
  fs.load("./test/fixture/kurukku.txt")
28
28
  files=fs.search("kku")
29
29
  assert (files[0].include?("fixture/kurukku.txt")==true)
@@ -66,14 +66,14 @@ class FileScannerBasicSearch < Test::Unit::TestCase
66
66
 
67
67
  def test_very_big_file
68
68
  fs=CodeZauker::FileScanner.new()
69
- fs.load("./test/fixture/TEST_LICENSE.txt",noReload=true)
69
+ fs.load("./test/fixture/TEST_LICENSE.txt")
70
70
  files=fs.search('"Commercial Use"')
71
71
  assert files.include?("./test/fixture/TEST_LICENSE.txt")==true
72
72
  end
73
73
 
74
74
  def test_remove
75
75
  fs=CodeZauker::FileScanner.new()
76
- fs.load("./test/fixture/kurukku.txt", noReload=true)
76
+ fs.load("./test/fixture/kurukku.txt")
77
77
  fs.remove(["./test/fixture/kurukku.txt"])
78
78
  files=fs.search("\"Be hungry, be foolish\"")
79
79
  assert files.length ==0,
@@ -86,7 +86,7 @@ class FileScannerBasicSearch < Test::Unit::TestCase
86
86
  # require 'redis'
87
87
  # redis=Redis.new
88
88
  # fs=CodeZauker::FileScanner.new(redis)
89
- # fs.load("./test/fixture/kurukku.txt", noReload=true)
89
+ # fs.load("./test/fixture/kurukku.txt")
90
90
  # fs.removeAll()
91
91
  # foundKeys=redis.keys "*"
92
92
  # #puts "Keys at empty db:#{foundKeys}"
@@ -96,14 +96,14 @@ class FileScannerBasicSearch < Test::Unit::TestCase
96
96
  # # 2012 Jan 30 New Case Insensitive Test cases
97
97
  def test_case_insensitive1
98
98
  fs=CodeZauker::FileScanner.new()
99
- fs.load("./test/fixture/kurukku.txt", noReload=true)
99
+ fs.load("./test/fixture/kurukku.txt")
100
100
  flist=fs.isearch("caseinsensitive Search TEST.")
101
101
  assert flist.include?("./test/fixture/kurukku.txt"), "Case insensitive search failed. #{flist}"
102
102
  end
103
103
 
104
104
  def test_case_insensitive2
105
105
  fs=CodeZauker::FileScanner.new()
106
- fs.load("./test/fixture/kurukku.txt", noReload=true)
106
+ fs.load("./test/fixture/kurukku.txt")
107
107
  flist=fs.isearch("caSeinsenSitive Search TEST.")
108
108
  assert flist.include?("./test/fixture/kurukku.txt"), "Case insensitive search failed. #{flist}"
109
109
  assert fs.search("CASeinsenSitivE").include?("./test/fixture/kurukku.txt"), "Search must be always insensitive"
@@ -111,7 +111,7 @@ class FileScannerBasicSearch < Test::Unit::TestCase
111
111
 
112
112
  def test_case_insensitive3
113
113
  fs=CodeZauker::FileScanner.new()
114
- fs.load("./test/fixture/kurukku.txt", noReload=true)
114
+ fs.load("./test/fixture/kurukku.txt")
115
115
  u=CodeZauker::Util.new()
116
116
  (u.mixCase("CaSeinsen")).each { |t|
117
117
  #puts "Checking #{t}"
File without changes
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: code_zauker
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.9
4
+ version: 0.1.0
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,11 +9,11 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2012-10-22 00:00:00.000000000 Z
12
+ date: 2013-05-21 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: yard
16
- requirement: &79400240 !ruby/object:Gem::Requirement
16
+ requirement: !ruby/object:Gem::Requirement
17
17
  none: false
18
18
  requirements:
19
19
  - - ~>
@@ -21,10 +21,15 @@ dependencies:
21
21
  version: '0.7'
22
22
  type: :development
23
23
  prerelease: false
24
- version_requirements: *79400240
24
+ version_requirements: !ruby/object:Gem::Requirement
25
+ none: false
26
+ requirements:
27
+ - - ~>
28
+ - !ruby/object:Gem::Version
29
+ version: '0.7'
25
30
  - !ruby/object:Gem::Dependency
26
31
  name: rubyzip
27
- requirement: &79399900 !ruby/object:Gem::Requirement
32
+ requirement: !ruby/object:Gem::Requirement
28
33
  none: false
29
34
  requirements:
30
35
  - - ~>
@@ -32,21 +37,15 @@ dependencies:
32
37
  version: '0.9'
33
38
  type: :development
34
39
  prerelease: false
35
- version_requirements: *79399900
36
- - !ruby/object:Gem::Dependency
37
- name: hiredis
38
- requirement: &79399600 !ruby/object:Gem::Requirement
40
+ version_requirements: !ruby/object:Gem::Requirement
39
41
  none: false
40
42
  requirements:
41
43
  - - ~>
42
44
  - !ruby/object:Gem::Version
43
- version: '0.3'
44
- type: :runtime
45
- prerelease: false
46
- version_requirements: *79399600
45
+ version: '0.9'
47
46
  - !ruby/object:Gem::Dependency
48
47
  name: redis
49
- requirement: &79399290 !ruby/object:Gem::Requirement
48
+ requirement: !ruby/object:Gem::Requirement
50
49
  none: false
51
50
  requirements:
52
51
  - - ~>
@@ -54,10 +53,15 @@ dependencies:
54
53
  version: '2.2'
55
54
  type: :runtime
56
55
  prerelease: false
57
- version_requirements: *79399290
56
+ version_requirements: !ruby/object:Gem::Requirement
57
+ none: false
58
+ requirements:
59
+ - - ~>
60
+ - !ruby/object:Gem::Version
61
+ version: '2.2'
58
62
  - !ruby/object:Gem::Dependency
59
63
  name: pdf-reader
60
- requirement: &79398890 !ruby/object:Gem::Requirement
64
+ requirement: !ruby/object:Gem::Requirement
61
65
  none: false
62
66
  requirements:
63
67
  - - ~>
@@ -65,10 +69,15 @@ dependencies:
65
69
  version: 1.0.0
66
70
  type: :runtime
67
71
  prerelease: false
68
- version_requirements: *79398890
72
+ version_requirements: !ruby/object:Gem::Requirement
73
+ none: false
74
+ requirements:
75
+ - - ~>
76
+ - !ruby/object:Gem::Version
77
+ version: 1.0.0
69
78
  - !ruby/object:Gem::Dependency
70
79
  name: sinatra
71
- requirement: &79398560 !ruby/object:Gem::Requirement
80
+ requirement: !ruby/object:Gem::Requirement
72
81
  none: false
73
82
  requirements:
74
83
  - - ~>
@@ -76,10 +85,15 @@ dependencies:
76
85
  version: '1.3'
77
86
  type: :runtime
78
87
  prerelease: false
79
- version_requirements: *79398560
88
+ version_requirements: !ruby/object:Gem::Requirement
89
+ none: false
90
+ requirements:
91
+ - - ~>
92
+ - !ruby/object:Gem::Version
93
+ version: '1.3'
80
94
  - !ruby/object:Gem::Dependency
81
95
  name: redis_logger
82
- requirement: &79398070 !ruby/object:Gem::Requirement
96
+ requirement: !ruby/object:Gem::Requirement
83
97
  none: false
84
98
  requirements:
85
99
  - - ~>
@@ -87,13 +101,19 @@ dependencies:
87
101
  version: '0.1'
88
102
  type: :runtime
89
103
  prerelease: false
90
- version_requirements: *79398070
104
+ version_requirements: !ruby/object:Gem::Requirement
105
+ none: false
106
+ requirements:
107
+ - - ~>
108
+ - !ruby/object:Gem::Version
109
+ version: '0.1'
91
110
  description: Code Zauker is based from ideas taken by old Google Code Search and uses
92
111
  Redis as a basic platform
93
112
  email:
94
113
  - jj@gioorgi.com
95
114
  executables:
96
115
  - czindexer
116
+ - czlist
97
117
  - czsearch
98
118
  - mczindexer
99
119
  - report.rb
@@ -108,6 +128,7 @@ files:
108
128
  - LICENSE.txt
109
129
  - Rakefile
110
130
  - bin/czindexer
131
+ - bin/czlist
111
132
  - bin/czsearch
112
133
  - bin/mczindexer
113
134
  - bin/report.rb
@@ -184,7 +205,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
184
205
  version: '0'
185
206
  requirements: []
186
207
  rubyforge_project: code_zauker
187
- rubygems_version: 1.8.11
208
+ rubygems_version: 1.8.24
188
209
  signing_key:
189
210
  specification_version: 3
190
211
  summary: A search engine for programming languages