code_zauker 0.0.7 → 0.0.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/doc/method_list.html CHANGED
@@ -8,13 +8,13 @@
8
8
 
9
9
  <link rel="stylesheet" href="css/common.css" type="text/css" media="screen" charset="utf-8" />
10
10
 
11
-
11
+
12
12
 
13
13
  <script type="text/javascript" charset="utf-8" src="js/jquery.js"></script>
14
14
 
15
15
  <script type="text/javascript" charset="utf-8" src="js/full_list.js"></script>
16
16
 
17
-
17
+
18
18
  <base id="base_target" target="_parent" />
19
19
  </head>
20
20
  <body>
@@ -28,19 +28,33 @@
28
28
  <h1 id="full_list_header">Method List</h1>
29
29
  <div id="nav">
30
30
 
31
- <a target="_self" href="class_list.html">Classes</a>
31
+ <span><a target="_self" href="class_list.html">
32
+ Classes
33
+ </a></span>
32
34
 
33
- <a target="_self" href="method_list.html">Methods</a>
35
+ <span><a target="_self" href="method_list.html">
36
+ Methods
37
+ </a></span>
34
38
 
35
- <a target="_self" href="file_list.html">Files</a>
39
+ <span><a target="_self" href="file_list.html">
40
+ Files
41
+ </a></span>
36
42
 
37
43
  </div>
38
44
  <div id="search">Search: <input type="text" /></div>
39
45
 
40
- <ul id="full_list" class="methods">
46
+ <ul id="full_list" class="method">
41
47
 
42
48
 
43
49
  <li class="r1 ">
50
+ <span class='object_link'><a href="CodeZauker/IndexManager.html#check_repair-instance_method" title="CodeZauker::IndexManager#check_repair (method)">#check_repair</a></span>
51
+
52
+ <small>CodeZauker::IndexManager</small>
53
+
54
+ </li>
55
+
56
+
57
+ <li class="r2 ">
44
58
  <span class='object_link'><a href="CodeZauker/FileScanner.html#disconnect-instance_method" title="CodeZauker::FileScanner#disconnect (method)">#disconnect</a></span>
45
59
 
46
60
  <small>CodeZauker::FileScanner</small>
@@ -48,7 +62,23 @@
48
62
  </li>
49
63
 
50
64
 
65
+ <li class="r1 ">
66
+ <span class='object_link'><a href="CodeZauker/CliUtil.html#doWildSearch-instance_method" title="CodeZauker::CliUtil#doWildSearch (method)">#doWildSearch</a></span>
67
+
68
+ <small>CodeZauker::CliUtil</small>
69
+
70
+ </li>
71
+
72
+
51
73
  <li class="r2 ">
74
+ <span class='object_link'><a href="CodeZauker/CliUtil.html#do_report-instance_method" title="CodeZauker::CliUtil#do_report (method)">#do_report</a></span>
75
+
76
+ <small>CodeZauker::CliUtil</small>
77
+
78
+ </li>
79
+
80
+
81
+ <li class="r1 ">
52
82
  <span class='object_link'><a href="CodeZauker/Util.html#ensureUTF8-instance_method" title="CodeZauker::Util#ensureUTF8 (method)">#ensureUTF8</a></span>
53
83
 
54
84
  <small>CodeZauker::Util</small>
@@ -56,7 +86,7 @@
56
86
  </li>
57
87
 
58
88
 
59
- <li class="r1 ">
89
+ <li class="r2 ">
60
90
  <span class='object_link'><a href="CodeZauker/Util.html#get_lines-instance_method" title="CodeZauker::Util#get_lines (method)">#get_lines</a></span>
61
91
 
62
92
  <small>CodeZauker::Util</small>
@@ -64,7 +94,7 @@
64
94
  </li>
65
95
 
66
96
 
67
- <li class="r2 ">
97
+ <li class="r1 ">
68
98
  <span class='object_link'><a href="Grep.html#grep-instance_method" title="Grep#grep (method)">#grep</a></span>
69
99
 
70
100
  <small>Grep</small>
@@ -72,7 +102,7 @@
72
102
  </li>
73
103
 
74
104
 
75
- <li class="r1 ">
105
+ <li class="r2 ">
76
106
  <span class='object_link'><a href="CodeZauker/FileScanner.html#initialize-instance_method" title="CodeZauker::FileScanner#initialize (method)">#initialize</a></span>
77
107
 
78
108
  <small>CodeZauker::FileScanner</small>
@@ -80,6 +110,14 @@
80
110
  </li>
81
111
 
82
112
 
113
+ <li class="r1 ">
114
+ <span class='object_link'><a href="CodeZauker/IndexManager.html#initialize-instance_method" title="CodeZauker::IndexManager#initialize (method)">#initialize</a></span>
115
+
116
+ <small>CodeZauker::IndexManager</small>
117
+
118
+ </li>
119
+
120
+
83
121
  <li class="r2 ">
84
122
  <span class='object_link'><a href="CodeZauker/Util.html#is_pdf%3F-instance_method" title="CodeZauker::Util#is_pdf? (method)">#is_pdf?</a></span>
85
123
 
@@ -121,6 +159,14 @@
121
159
 
122
160
 
123
161
  <li class="r1 ">
162
+ <span class='object_link'><a href="CodeZauker/CliUtil.html#parse_host_options-instance_method" title="CodeZauker::CliUtil#parse_host_options (method)">#parse_host_options</a></span>
163
+
164
+ <small>CodeZauker::CliUtil</small>
165
+
166
+ </li>
167
+
168
+
169
+ <li class="r2 ">
124
170
  <span class='object_link'><a href="CodeZauker/FileScanner.html#reindex-instance_method" title="CodeZauker::FileScanner#reindex (method)">#reindex</a></span>
125
171
 
126
172
  <small>CodeZauker::FileScanner</small>
@@ -128,7 +174,7 @@
128
174
  </li>
129
175
 
130
176
 
131
- <li class="r2 ">
177
+ <li class="r1 ">
132
178
  <span class='object_link'><a href="CodeZauker/FileScanner.html#remove-instance_method" title="CodeZauker::FileScanner#remove (method)">#remove</a></span>
133
179
 
134
180
  <small>CodeZauker::FileScanner</small>
@@ -136,7 +182,7 @@
136
182
  </li>
137
183
 
138
184
 
139
- <li class="r1 ">
185
+ <li class="r2 ">
140
186
  <span class='object_link'><a href="CodeZauker/FileScanner.html#removeAll-instance_method" title="CodeZauker::FileScanner#removeAll (method)">#removeAll</a></span>
141
187
 
142
188
  <small>CodeZauker::FileScanner</small>
@@ -144,7 +190,7 @@
144
190
  </li>
145
191
 
146
192
 
147
- <li class="r2 ">
193
+ <li class="r1 ">
148
194
  <span class='object_link'><a href="CodeZauker/FileScanner.html#search-instance_method" title="CodeZauker::FileScanner#search (method)">#search</a></span>
149
195
 
150
196
  <small>CodeZauker::FileScanner</small>
@@ -152,6 +198,14 @@
152
198
  </li>
153
199
 
154
200
 
201
+ <li class="r2 ">
202
+ <span class='object_link'><a href="CodeZauker/FileScanner.html#wsearch-instance_method" title="CodeZauker::FileScanner#wsearch (method)">#wsearch</a></span>
203
+
204
+ <small>CodeZauker::FileScanner</small>
205
+
206
+ </li>
207
+
208
+
155
209
  </ul>
156
210
  </div>
157
211
  </body>
@@ -6,7 +6,7 @@
6
6
  <title>
7
7
  Top Level Namespace
8
8
 
9
- &mdash; Code Zauker 0.0.5 Documentation
9
+ &mdash; Code Zauker 0.0.8 Documentation
10
10
 
11
11
  </title>
12
12
 
@@ -15,10 +15,12 @@
15
15
  <link rel="stylesheet" href="css/common.css" type="text/css" media="screen" charset="utf-8" />
16
16
 
17
17
  <script type="text/javascript" charset="utf-8">
18
+ hasFrames = window.top.frames.main ? true : false;
18
19
  relpath = '';
19
- if (relpath != '') relpath += '/';
20
+ framesUrl = "frames.html#!" + escape(window.location.href);
20
21
  </script>
21
22
 
23
+
22
24
  <script type="text/javascript" charset="utf-8" src="js/jquery.js"></script>
23
25
 
24
26
  <script type="text/javascript" charset="utf-8" src="js/app.js"></script>
@@ -26,36 +28,41 @@
26
28
 
27
29
  </head>
28
30
  <body>
29
- <script type="text/javascript" charset="utf-8">
30
- if (window.top.frames.main) document.body.className = 'frames';
31
- </script>
32
-
33
31
  <div id="header">
34
32
  <div id="menu">
35
33
 
36
- <a href="_index.html">Index</a> &raquo;
34
+ <a href="_index.html">Index</a> &raquo;
37
35
 
38
36
 
39
37
  <span class="title">Top Level Namespace</span>
40
38
 
41
-
39
+
42
40
  <div class="noframes"><span class="title">(</span><a href="." target="_top">no frames</a><span class="title">)</span></div>
43
41
  </div>
44
42
 
45
43
  <div id="search">
46
44
 
47
- <a id="class_list_link" href="#">Class List</a>
45
+ <a class="full_list_link" id="class_list_link"
46
+ href="class_list.html">
47
+ Class List
48
+ </a>
48
49
 
49
- <a id="method_list_link" href="#">Method List</a>
50
+ <a class="full_list_link" id="method_list_link"
51
+ href="method_list.html">
52
+ Method List
53
+ </a>
50
54
 
51
- <a id="file_list_link" href="#">File List</a>
55
+ <a class="full_list_link" id="file_list_link"
56
+ href="file_list.html">
57
+ File List
58
+ </a>
52
59
 
53
60
  </div>
54
61
  <div class="clear"></div>
55
62
  </div>
56
-
63
+
57
64
  <iframe id="search_frame"></iframe>
58
-
65
+
59
66
  <div id="content"><h1>Top Level Namespace
60
67
 
61
68
 
@@ -68,6 +75,10 @@
68
75
 
69
76
 
70
77
 
78
+ <dt class="r1">Includes:</dt>
79
+ <dd class="r1"><span class='object_link'><a href="Grep.html" title="Grep (module)">Grep</a></span></dd>
80
+
81
+
71
82
 
72
83
 
73
84
 
@@ -76,11 +87,11 @@
76
87
 
77
88
  <h2>Defined Under Namespace</h2>
78
89
  <p class="children">
79
-
90
+
80
91
 
81
92
  <strong class="modules">Modules:</strong> <span class='object_link'><a href="CodeZauker.html" title="CodeZauker (module)">CodeZauker</a></span>, <span class='object_link'><a href="Grep.html" title="Grep (module)">Grep</a></span>
82
93
 
83
-
94
+
84
95
 
85
96
 
86
97
  </p>
@@ -92,13 +103,24 @@
92
103
 
93
104
 
94
105
 
106
+
107
+
108
+
109
+
110
+
111
+
112
+ <h2>Method Summary</h2>
113
+
114
+ <h3 class="inherited">Methods included from <span class='object_link'><a href="Grep.html" title="Grep (module)">Grep</a></span></h3>
115
+ <p class="inherited"><span class='object_link'><a href="Grep.html#grep-instance_method" title="Grep#grep (method)">#grep</a></span></p>
116
+
95
117
 
96
118
  </div>
97
-
119
+
98
120
  <div id="footer">
99
- Generated on Mon Apr 9 16:40:14 2012 by
121
+ Generated on Wed May 16 17:14:53 2012 by
100
122
  <a href="http://yardoc.org" title="Yay! A Ruby Documentation Tool" target="_parent">yard</a>
101
- 0.7.5 (ruby-1.9.3).
123
+ 0.8.1 (ruby-1.9.3).
102
124
  </div>
103
125
 
104
126
  </body>
data/etc/redis.conf CHANGED
@@ -84,11 +84,11 @@ databases 16
84
84
  #
85
85
  # Note: you can disable saving at all commenting all the "save" lines.
86
86
 
87
- save 900 1
88
- save 300 10
87
+ save 900 100
88
+ save 300 1000
89
89
  # Incresed minute saver, to improve performance
90
90
  #save 60 10000
91
- save 60 600000
91
+ save 60 60000000
92
92
 
93
93
  # Compress string objects using LZF when dump .rdb databases?
94
94
  # For default that's set to 'yes' as it's almost always a win.
@@ -1,7 +1,22 @@
1
1
  module CodeZauker
2
2
 
3
3
  class CliUtil
4
-
4
+
5
+ # Create a regexp to do a case insensitive wild search used by grep
6
+ def doWildSearch(term,fileScanner)
7
+ fileGroup=fileScanner.wsearch(term)
8
+ # Make a simple regexp from the wild stuff...
9
+ finalRegexp=""
10
+ term.split("*").each do |term|
11
+ finalRegexp= finalRegexp+Regexp.escape(term)+".*"
12
+ end
13
+ return {
14
+ :regexp=>/#{finalRegexp}/i,
15
+ :files => fileGroup
16
+ }
17
+
18
+ end
19
+
5
20
  def parse_host_options(connection_string)
6
21
  #puts "Parsing... #{connection_string}"
7
22
  options={}
@@ -13,6 +13,12 @@ module CodeZauker
13
13
  ".zip",".7z","rar",
14
14
  # MS Office zip-like files...
15
15
  ".pptx",".docx",".xlsx",
16
+ # MS Visual Studio big bad files"
17
+ ".scc",".datasource",".pdb","vspscc",".settings",
18
+ #"Telerik.Web.UI.xml",
19
+ ".Web.UI.xml",
20
+ # Auto-generated stuff...is suggested to be avoided
21
+ ".designer.cs",
16
22
  # Avoid slurping text document too...
17
23
  ".doc",
18
24
  ".ppt",".xls",".rtf",".vsd", ".odf",
@@ -27,7 +33,7 @@ module CodeZauker
27
33
  ".tar",
28
34
  ".gz",".Z",
29
35
  ".dropbox",
30
- ".svn-base",".pdb",".cache",
36
+ ".svn-base",".cache",
31
37
  #IDE STUFF
32
38
  ".wlwLock",
33
39
  # Music exclusion
@@ -1,3 +1,4 @@
1
1
  module CodeZauker
2
- VERSION = "0.0.7"
2
+ VERSION = "0.0.8"
3
+ DB_VERSION = 1
3
4
  end
data/lib/code_zauker.rb CHANGED
@@ -1,10 +1,13 @@
1
1
  # -*- mode:ruby ; -*- -*
2
2
  require "code_zauker/version"
3
3
  require "code_zauker/constants"
4
+ require 'code_zauker/grep'
4
5
  require 'redis/connection/hiredis'
5
6
  require 'redis'
6
7
  require 'set'
7
8
  require 'pdf/reader'
9
+ require 'date'
10
+
8
11
  # This module implements a simple reverse indexer
9
12
  # based on Redis
10
13
  # The idea is ispired by http://swtch.com/~rsc/regexp/regexp4.html
@@ -102,8 +105,47 @@ module CodeZauker
102
105
  end
103
106
  return lines
104
107
  end
108
+ end
105
109
 
106
-
110
+ # Manage the index and keep it well organized
111
+ class IndexManager
112
+ def initialize(redisConnection=nil)
113
+ if redisConnection==nil
114
+ @redis=Redis.new
115
+ else
116
+ @redis=redisConnection
117
+ end
118
+ end
119
+
120
+ def check_repair
121
+
122
+ puts "Staring index check"
123
+ dbversion=@redis.hget("codezauker","db_version")
124
+ if dbversion==nil
125
+ puts "DB Version <=0.7"
126
+ @redis.hset("codezauker","db_version",CodeZauker::DB_VERSION)
127
+ # no other checks to do right now
128
+ else
129
+ if dbversion.to_i() > CodeZauker::DB_VERSION
130
+ raise "DB Version #{dbversion} is greater than my #{CodeZauker::DB_VERSION}"
131
+ else
132
+ puts "Migrating from #{dbversion} to #{CodeZauker::DB_VERSION}"
133
+ # Nothing to do right now
134
+ end
135
+ end
136
+ puts "Summary....."
137
+ dbversion=@redis.hget("codezauker","db_version")
138
+ last_check=@redis.hget("codezauker","last_check")
139
+ puts "DB Version: #{dbversion}"
140
+ puts "Last Check: #{last_check}"
141
+ puts "Checking...."
142
+ @redis.hset("codezauker","last_check",DateTime.now().to_s())
143
+ puts "Issuing save..."
144
+ @redis.save()
145
+ puts "Save successful"
146
+ @redis.quit()
147
+ puts "Disconnected from redis"
148
+ end
107
149
  end
108
150
 
109
151
  # Scan a file and push it inside redis...
@@ -119,7 +161,12 @@ module CodeZauker
119
161
 
120
162
 
121
163
  def disconnect()
122
- @redis.quit
164
+ begin
165
+ @redis.quit
166
+ rescue Errno::EAGAIN =>e
167
+ # Nothing to do...
168
+ puts "Ignored EAGAIN ERROR during disconnect..."
169
+ end
123
170
  end
124
171
 
125
172
 
@@ -251,7 +298,7 @@ module CodeZauker
251
298
  trigramInAnd=Set.new()
252
299
  # Search=> Sea AND ear AND arc AND rch
253
300
  for j in 0...term.length
254
- currentTrigram=term[j,GRAM_SIZE]
301
+ currentTrigram=term[j,GRAM_SIZE]
255
302
  if currentTrigram.length <GRAM_SIZE
256
303
  # We are at the end...
257
304
  break
@@ -289,6 +336,41 @@ module CodeZauker
289
336
  return map_ids_to_files(fileIds)
290
337
  end
291
338
 
339
+ # = wild cards search
340
+ # You can search trigram in the form
341
+ # public*class*Apple
342
+ # will match java declaration of MyApple but not
343
+ # YourAppManager
344
+ def wsearch(term)
345
+ # Split stuff
346
+ puts "Wild Search request:#{term}"
347
+ m=term.split("*")
348
+ if m.length>0
349
+ trigramInAnd=Set.new()
350
+ puts "*= Found:#{m.length}"
351
+ m.each do | wtc |
352
+ wt=wtc.downcase()
353
+ #puts "Splitting #{wt}"
354
+ trigSet=split_in_trigrams(wt,"trigram:ci")
355
+ trigramInAnd=trigramInAnd.merge(trigSet)
356
+ end
357
+ # puts "Trigrams: #{trigramInAnd.length}"
358
+ # trigramInAnd.each do | x |
359
+ # puts "#{x}"
360
+ # end
361
+ if trigramInAnd.length==0
362
+ return []
363
+ end
364
+ fileIds=@redis.sinter(*trigramInAnd)
365
+ fileNames=map_ids_to_files(fileIds)
366
+ #puts "DEBUG #{fileIds} #{fileNames}"
367
+ return fileNames
368
+ else
369
+ puts "Warn no Wild!"
370
+ return search(term)
371
+ end
372
+ end
373
+
292
374
 
293
375
  # = search
294
376
  # Find a list of file candidates to a search string
data/readme.org CHANGED
@@ -52,7 +52,9 @@ It is still beta, but it is *very* fast, thank to redis!
52
52
  ** Parallel execution
53
53
  If you want to speed up indexing, you can use the mczindexer command.
54
54
  For instance:
55
+ #+BEGIN_SRC sh
55
56
  mczindexer eclipse-sources/
57
+ #+END_SRC
56
58
  will fire at most 10 parallel czindexer.
57
59
 
58
60
  ** Simple stats
@@ -64,29 +66,24 @@ Run
64
66
  and enjoy!
65
67
 
66
68
 
69
+ * DB Version
70
+ Starting from version 0.0.8, a new index check option on czindexer
71
+ will be able to migrate database between release,
72
+
73
+
74
+
67
75
  * Release History
68
- | Version | Date | Summary |
69
- |---------+-------------+-----------------------------------------------------------------|
70
- | 0.0.7 | 13 May 2012 | Better documentation, mczindexer, new report command
71
- | 0.0.6 | 04 May 2012 | New redis-server option. Better web search with results hilight |
72
- | 0.0.5 | 09 Apr 2012 | Added Sinatra-based web search page, featuring bootrstrap css |
73
- | 0.0.4 | 12 Feb 2012 | PDF Searching |
74
- | 0.0.3 | 03 Feb 2012 | Added Case insensitive search.UTF-8 trigram database |
75
- | 0.0.2 | 29 Jan 2012 | Removed dependency on unix find for czindexer. |
76
- | 0.0.1 | 26 Jan 2012 | First RubyGems Release (for testing purpose only) |
77
- | | | |
78
- | | | |
79
-
80
- * Aws tests
81
- ** Micro instance
82
- Without multiplexing you get
83
- real 4m39.599s
84
- for indexing code_zauker
85
-
86
- With
87
- time find . -type f -print0 | xargs -0 -P 10 -n 20 ./bin/czindexer -v --redis-server awsserver
88
- You get about
89
- real 0m31.284s
76
+ | Version | Date | Summary |
77
+ |---------+-------------+------------------------------------------------------------------------|
78
+ | 0.0.8 | 04 Jun 2012 | Wildcard (*) search/better error handling of missed files/indexchecker |
79
+ | 0.0.7 | 13 May 2012 | Better documentation, mczindexer, new report command |
80
+ | 0.0.6 | 04 May 2012 | New redis-server option. Better web search with results hilight |
81
+ | 0.0.5 | 09 Apr 2012 | Added Sinatra-based web search page, featuring bootrstrap css |
82
+ | 0.0.4 | 12 Feb 2012 | PDF Searching |
83
+ | 0.0.3 | 03 Feb 2012 | Added Case insensitive search.UTF-8 trigram database |
84
+ | 0.0.2 | 29 Jan 2012 | Removed dependency on unix find for czindexer. |
85
+ | 0.0.1 | 26 Jan 2012 | First RubyGems Release (for testing purpose only) |
86
+
90
87
 
91
88
 
92
89
  * DEVELOPING
@@ -0,0 +1 @@
1
+ Wild Wild West Movie
@@ -0,0 +1,39 @@
1
+ # -*- encoding: utf-8 -*-
2
+ # To test use
3
+ # rake TEST=test/test_wild_search.rb
4
+ require 'test/unit'
5
+ require 'code_zauker'
6
+
7
+ # See ri Test::Unit::Assertions
8
+ # for assertion documentation
9
+ class FileScannerBasicSearch < Test::Unit::TestCase
10
+ #This test can search very uinque things...
11
+ def test_foolish_wild1
12
+ fs=CodeZauker::FileScanner.new()
13
+ fs.load("./test/fixture/wildtest.txt")
14
+ files=fs.wsearch("Wild*West")
15
+ assert(files.include?("./test/fixture/wildtest.txt")== true,
16
+ "Expected file not found. Files found:#{files}")
17
+
18
+ end
19
+
20
+ def test_foolish_wild2
21
+ fs=CodeZauker::FileScanner.new()
22
+ fs.load("./test/fixture/wildtest.txt")
23
+ files=fs.wsearch("Wild*West*Movie")
24
+ assert(files.include?("./test/fixture/wildtest.txt")== true,
25
+ "Expected file not found. Files found:#{files}")
26
+ end
27
+
28
+ # Also unordered match will work
29
+ # So the negative match is difficult
30
+ def test_foolish_wild3
31
+ fs=CodeZauker::FileScanner.new()
32
+ fs.load("./test/fixture/wildtest.txt")
33
+ files=fs.wsearch("West*Wild*NotOnTheSameLineForSure")
34
+ assert(files.include?("./test/fixture/wildtest.txt")== false,
35
+ "Expected not matching wildtest.txt file. Matches:#{files}")
36
+ end
37
+
38
+
39
+ end