code_zauker 0.0.7 → 0.0.8

Sign up to get free protection for your applications and to get access to all the features.
data/doc/method_list.html CHANGED
@@ -8,13 +8,13 @@
8
8
 
9
9
  <link rel="stylesheet" href="css/common.css" type="text/css" media="screen" charset="utf-8" />
10
10
 
11
-
11
+
12
12
 
13
13
  <script type="text/javascript" charset="utf-8" src="js/jquery.js"></script>
14
14
 
15
15
  <script type="text/javascript" charset="utf-8" src="js/full_list.js"></script>
16
16
 
17
-
17
+
18
18
  <base id="base_target" target="_parent" />
19
19
  </head>
20
20
  <body>
@@ -28,19 +28,33 @@
28
28
  <h1 id="full_list_header">Method List</h1>
29
29
  <div id="nav">
30
30
 
31
- <a target="_self" href="class_list.html">Classes</a>
31
+ <span><a target="_self" href="class_list.html">
32
+ Classes
33
+ </a></span>
32
34
 
33
- <a target="_self" href="method_list.html">Methods</a>
35
+ <span><a target="_self" href="method_list.html">
36
+ Methods
37
+ </a></span>
34
38
 
35
- <a target="_self" href="file_list.html">Files</a>
39
+ <span><a target="_self" href="file_list.html">
40
+ Files
41
+ </a></span>
36
42
 
37
43
  </div>
38
44
  <div id="search">Search: <input type="text" /></div>
39
45
 
40
- <ul id="full_list" class="methods">
46
+ <ul id="full_list" class="method">
41
47
 
42
48
 
43
49
  <li class="r1 ">
50
+ <span class='object_link'><a href="CodeZauker/IndexManager.html#check_repair-instance_method" title="CodeZauker::IndexManager#check_repair (method)">#check_repair</a></span>
51
+
52
+ <small>CodeZauker::IndexManager</small>
53
+
54
+ </li>
55
+
56
+
57
+ <li class="r2 ">
44
58
  <span class='object_link'><a href="CodeZauker/FileScanner.html#disconnect-instance_method" title="CodeZauker::FileScanner#disconnect (method)">#disconnect</a></span>
45
59
 
46
60
  <small>CodeZauker::FileScanner</small>
@@ -48,7 +62,23 @@
48
62
  </li>
49
63
 
50
64
 
65
+ <li class="r1 ">
66
+ <span class='object_link'><a href="CodeZauker/CliUtil.html#doWildSearch-instance_method" title="CodeZauker::CliUtil#doWildSearch (method)">#doWildSearch</a></span>
67
+
68
+ <small>CodeZauker::CliUtil</small>
69
+
70
+ </li>
71
+
72
+
51
73
  <li class="r2 ">
74
+ <span class='object_link'><a href="CodeZauker/CliUtil.html#do_report-instance_method" title="CodeZauker::CliUtil#do_report (method)">#do_report</a></span>
75
+
76
+ <small>CodeZauker::CliUtil</small>
77
+
78
+ </li>
79
+
80
+
81
+ <li class="r1 ">
52
82
  <span class='object_link'><a href="CodeZauker/Util.html#ensureUTF8-instance_method" title="CodeZauker::Util#ensureUTF8 (method)">#ensureUTF8</a></span>
53
83
 
54
84
  <small>CodeZauker::Util</small>
@@ -56,7 +86,7 @@
56
86
  </li>
57
87
 
58
88
 
59
- <li class="r1 ">
89
+ <li class="r2 ">
60
90
  <span class='object_link'><a href="CodeZauker/Util.html#get_lines-instance_method" title="CodeZauker::Util#get_lines (method)">#get_lines</a></span>
61
91
 
62
92
  <small>CodeZauker::Util</small>
@@ -64,7 +94,7 @@
64
94
  </li>
65
95
 
66
96
 
67
- <li class="r2 ">
97
+ <li class="r1 ">
68
98
  <span class='object_link'><a href="Grep.html#grep-instance_method" title="Grep#grep (method)">#grep</a></span>
69
99
 
70
100
  <small>Grep</small>
@@ -72,7 +102,7 @@
72
102
  </li>
73
103
 
74
104
 
75
- <li class="r1 ">
105
+ <li class="r2 ">
76
106
  <span class='object_link'><a href="CodeZauker/FileScanner.html#initialize-instance_method" title="CodeZauker::FileScanner#initialize (method)">#initialize</a></span>
77
107
 
78
108
  <small>CodeZauker::FileScanner</small>
@@ -80,6 +110,14 @@
80
110
  </li>
81
111
 
82
112
 
113
+ <li class="r1 ">
114
+ <span class='object_link'><a href="CodeZauker/IndexManager.html#initialize-instance_method" title="CodeZauker::IndexManager#initialize (method)">#initialize</a></span>
115
+
116
+ <small>CodeZauker::IndexManager</small>
117
+
118
+ </li>
119
+
120
+
83
121
  <li class="r2 ">
84
122
  <span class='object_link'><a href="CodeZauker/Util.html#is_pdf%3F-instance_method" title="CodeZauker::Util#is_pdf? (method)">#is_pdf?</a></span>
85
123
 
@@ -121,6 +159,14 @@
121
159
 
122
160
 
123
161
  <li class="r1 ">
162
+ <span class='object_link'><a href="CodeZauker/CliUtil.html#parse_host_options-instance_method" title="CodeZauker::CliUtil#parse_host_options (method)">#parse_host_options</a></span>
163
+
164
+ <small>CodeZauker::CliUtil</small>
165
+
166
+ </li>
167
+
168
+
169
+ <li class="r2 ">
124
170
  <span class='object_link'><a href="CodeZauker/FileScanner.html#reindex-instance_method" title="CodeZauker::FileScanner#reindex (method)">#reindex</a></span>
125
171
 
126
172
  <small>CodeZauker::FileScanner</small>
@@ -128,7 +174,7 @@
128
174
  </li>
129
175
 
130
176
 
131
- <li class="r2 ">
177
+ <li class="r1 ">
132
178
  <span class='object_link'><a href="CodeZauker/FileScanner.html#remove-instance_method" title="CodeZauker::FileScanner#remove (method)">#remove</a></span>
133
179
 
134
180
  <small>CodeZauker::FileScanner</small>
@@ -136,7 +182,7 @@
136
182
  </li>
137
183
 
138
184
 
139
- <li class="r1 ">
185
+ <li class="r2 ">
140
186
  <span class='object_link'><a href="CodeZauker/FileScanner.html#removeAll-instance_method" title="CodeZauker::FileScanner#removeAll (method)">#removeAll</a></span>
141
187
 
142
188
  <small>CodeZauker::FileScanner</small>
@@ -144,7 +190,7 @@
144
190
  </li>
145
191
 
146
192
 
147
- <li class="r2 ">
193
+ <li class="r1 ">
148
194
  <span class='object_link'><a href="CodeZauker/FileScanner.html#search-instance_method" title="CodeZauker::FileScanner#search (method)">#search</a></span>
149
195
 
150
196
  <small>CodeZauker::FileScanner</small>
@@ -152,6 +198,14 @@
152
198
  </li>
153
199
 
154
200
 
201
+ <li class="r2 ">
202
+ <span class='object_link'><a href="CodeZauker/FileScanner.html#wsearch-instance_method" title="CodeZauker::FileScanner#wsearch (method)">#wsearch</a></span>
203
+
204
+ <small>CodeZauker::FileScanner</small>
205
+
206
+ </li>
207
+
208
+
155
209
  </ul>
156
210
  </div>
157
211
  </body>
@@ -6,7 +6,7 @@
6
6
  <title>
7
7
  Top Level Namespace
8
8
 
9
- &mdash; Code Zauker 0.0.5 Documentation
9
+ &mdash; Code Zauker 0.0.8 Documentation
10
10
 
11
11
  </title>
12
12
 
@@ -15,10 +15,12 @@
15
15
  <link rel="stylesheet" href="css/common.css" type="text/css" media="screen" charset="utf-8" />
16
16
 
17
17
  <script type="text/javascript" charset="utf-8">
18
+ hasFrames = window.top.frames.main ? true : false;
18
19
  relpath = '';
19
- if (relpath != '') relpath += '/';
20
+ framesUrl = "frames.html#!" + escape(window.location.href);
20
21
  </script>
21
22
 
23
+
22
24
  <script type="text/javascript" charset="utf-8" src="js/jquery.js"></script>
23
25
 
24
26
  <script type="text/javascript" charset="utf-8" src="js/app.js"></script>
@@ -26,36 +28,41 @@
26
28
 
27
29
  </head>
28
30
  <body>
29
- <script type="text/javascript" charset="utf-8">
30
- if (window.top.frames.main) document.body.className = 'frames';
31
- </script>
32
-
33
31
  <div id="header">
34
32
  <div id="menu">
35
33
 
36
- <a href="_index.html">Index</a> &raquo;
34
+ <a href="_index.html">Index</a> &raquo;
37
35
 
38
36
 
39
37
  <span class="title">Top Level Namespace</span>
40
38
 
41
-
39
+
42
40
  <div class="noframes"><span class="title">(</span><a href="." target="_top">no frames</a><span class="title">)</span></div>
43
41
  </div>
44
42
 
45
43
  <div id="search">
46
44
 
47
- <a id="class_list_link" href="#">Class List</a>
45
+ <a class="full_list_link" id="class_list_link"
46
+ href="class_list.html">
47
+ Class List
48
+ </a>
48
49
 
49
- <a id="method_list_link" href="#">Method List</a>
50
+ <a class="full_list_link" id="method_list_link"
51
+ href="method_list.html">
52
+ Method List
53
+ </a>
50
54
 
51
- <a id="file_list_link" href="#">File List</a>
55
+ <a class="full_list_link" id="file_list_link"
56
+ href="file_list.html">
57
+ File List
58
+ </a>
52
59
 
53
60
  </div>
54
61
  <div class="clear"></div>
55
62
  </div>
56
-
63
+
57
64
  <iframe id="search_frame"></iframe>
58
-
65
+
59
66
  <div id="content"><h1>Top Level Namespace
60
67
 
61
68
 
@@ -68,6 +75,10 @@
68
75
 
69
76
 
70
77
 
78
+ <dt class="r1">Includes:</dt>
79
+ <dd class="r1"><span class='object_link'><a href="Grep.html" title="Grep (module)">Grep</a></span></dd>
80
+
81
+
71
82
 
72
83
 
73
84
 
@@ -76,11 +87,11 @@
76
87
 
77
88
  <h2>Defined Under Namespace</h2>
78
89
  <p class="children">
79
-
90
+
80
91
 
81
92
  <strong class="modules">Modules:</strong> <span class='object_link'><a href="CodeZauker.html" title="CodeZauker (module)">CodeZauker</a></span>, <span class='object_link'><a href="Grep.html" title="Grep (module)">Grep</a></span>
82
93
 
83
-
94
+
84
95
 
85
96
 
86
97
  </p>
@@ -92,13 +103,24 @@
92
103
 
93
104
 
94
105
 
106
+
107
+
108
+
109
+
110
+
111
+
112
+ <h2>Method Summary</h2>
113
+
114
+ <h3 class="inherited">Methods included from <span class='object_link'><a href="Grep.html" title="Grep (module)">Grep</a></span></h3>
115
+ <p class="inherited"><span class='object_link'><a href="Grep.html#grep-instance_method" title="Grep#grep (method)">#grep</a></span></p>
116
+
95
117
 
96
118
  </div>
97
-
119
+
98
120
  <div id="footer">
99
- Generated on Mon Apr 9 16:40:14 2012 by
121
+ Generated on Wed May 16 17:14:53 2012 by
100
122
  <a href="http://yardoc.org" title="Yay! A Ruby Documentation Tool" target="_parent">yard</a>
101
- 0.7.5 (ruby-1.9.3).
123
+ 0.8.1 (ruby-1.9.3).
102
124
  </div>
103
125
 
104
126
  </body>
data/etc/redis.conf CHANGED
@@ -84,11 +84,11 @@ databases 16
84
84
  #
85
85
  # Note: you can disable saving at all commenting all the "save" lines.
86
86
 
87
- save 900 1
88
- save 300 10
87
+ save 900 100
88
+ save 300 1000
89
89
  # Incresed minute saver, to improve performance
90
90
  #save 60 10000
91
- save 60 600000
91
+ save 60 60000000
92
92
 
93
93
  # Compress string objects using LZF when dump .rdb databases?
94
94
  # For default that's set to 'yes' as it's almost always a win.
@@ -1,7 +1,22 @@
1
1
  module CodeZauker
2
2
 
3
3
  class CliUtil
4
-
4
+
5
+ # Create a regexp to do a case insensitive wild search used by grep
6
+ def doWildSearch(term,fileScanner)
7
+ fileGroup=fileScanner.wsearch(term)
8
+ # Make a simple regexp from the wild stuff...
9
+ finalRegexp=""
10
+ term.split("*").each do |term|
11
+ finalRegexp= finalRegexp+Regexp.escape(term)+".*"
12
+ end
13
+ return {
14
+ :regexp=>/#{finalRegexp}/i,
15
+ :files => fileGroup
16
+ }
17
+
18
+ end
19
+
5
20
  def parse_host_options(connection_string)
6
21
  #puts "Parsing... #{connection_string}"
7
22
  options={}
@@ -13,6 +13,12 @@ module CodeZauker
13
13
  ".zip",".7z","rar",
14
14
  # MS Office zip-like files...
15
15
  ".pptx",".docx",".xlsx",
16
+ # MS Visual Studio big bad files"
17
+ ".scc",".datasource",".pdb","vspscc",".settings",
18
+ #"Telerik.Web.UI.xml",
19
+ ".Web.UI.xml",
20
+ # Auto-generated stuff...is suggested to be avoided
21
+ ".designer.cs",
16
22
  # Avoid slurping text document too...
17
23
  ".doc",
18
24
  ".ppt",".xls",".rtf",".vsd", ".odf",
@@ -27,7 +33,7 @@ module CodeZauker
27
33
  ".tar",
28
34
  ".gz",".Z",
29
35
  ".dropbox",
30
- ".svn-base",".pdb",".cache",
36
+ ".svn-base",".cache",
31
37
  #IDE STUFF
32
38
  ".wlwLock",
33
39
  # Music exclusion
@@ -1,3 +1,4 @@
1
1
  module CodeZauker
2
- VERSION = "0.0.7"
2
+ VERSION = "0.0.8"
3
+ DB_VERSION = 1
3
4
  end
data/lib/code_zauker.rb CHANGED
@@ -1,10 +1,13 @@
1
1
  # -*- mode:ruby ; -*- -*
2
2
  require "code_zauker/version"
3
3
  require "code_zauker/constants"
4
+ require 'code_zauker/grep'
4
5
  require 'redis/connection/hiredis'
5
6
  require 'redis'
6
7
  require 'set'
7
8
  require 'pdf/reader'
9
+ require 'date'
10
+
8
11
  # This module implements a simple reverse indexer
9
12
  # based on Redis
10
13
  # The idea is ispired by http://swtch.com/~rsc/regexp/regexp4.html
@@ -102,8 +105,47 @@ module CodeZauker
102
105
  end
103
106
  return lines
104
107
  end
108
+ end
105
109
 
106
-
110
+ # Manage the index and keep it well organized
111
+ class IndexManager
112
+ def initialize(redisConnection=nil)
113
+ if redisConnection==nil
114
+ @redis=Redis.new
115
+ else
116
+ @redis=redisConnection
117
+ end
118
+ end
119
+
120
+ def check_repair
121
+
122
+ puts "Staring index check"
123
+ dbversion=@redis.hget("codezauker","db_version")
124
+ if dbversion==nil
125
+ puts "DB Version <=0.7"
126
+ @redis.hset("codezauker","db_version",CodeZauker::DB_VERSION)
127
+ # no other checks to do right now
128
+ else
129
+ if dbversion.to_i() > CodeZauker::DB_VERSION
130
+ raise "DB Version #{dbversion} is greater than my #{CodeZauker::DB_VERSION}"
131
+ else
132
+ puts "Migrating from #{dbversion} to #{CodeZauker::DB_VERSION}"
133
+ # Nothing to do right now
134
+ end
135
+ end
136
+ puts "Summary....."
137
+ dbversion=@redis.hget("codezauker","db_version")
138
+ last_check=@redis.hget("codezauker","last_check")
139
+ puts "DB Version: #{dbversion}"
140
+ puts "Last Check: #{last_check}"
141
+ puts "Checking...."
142
+ @redis.hset("codezauker","last_check",DateTime.now().to_s())
143
+ puts "Issuing save..."
144
+ @redis.save()
145
+ puts "Save successful"
146
+ @redis.quit()
147
+ puts "Disconnected from redis"
148
+ end
107
149
  end
108
150
 
109
151
  # Scan a file and push it inside redis...
@@ -119,7 +161,12 @@ module CodeZauker
119
161
 
120
162
 
121
163
  def disconnect()
122
- @redis.quit
164
+ begin
165
+ @redis.quit
166
+ rescue Errno::EAGAIN =>e
167
+ # Nothing to do...
168
+ puts "Ignored EAGAIN ERROR during disconnect..."
169
+ end
123
170
  end
124
171
 
125
172
 
@@ -251,7 +298,7 @@ module CodeZauker
251
298
  trigramInAnd=Set.new()
252
299
  # Search=> Sea AND ear AND arc AND rch
253
300
  for j in 0...term.length
254
- currentTrigram=term[j,GRAM_SIZE]
301
+ currentTrigram=term[j,GRAM_SIZE]
255
302
  if currentTrigram.length <GRAM_SIZE
256
303
  # We are at the end...
257
304
  break
@@ -289,6 +336,41 @@ module CodeZauker
289
336
  return map_ids_to_files(fileIds)
290
337
  end
291
338
 
339
+ # = wild cards search
340
+ # You can search trigram in the form
341
+ # public*class*Apple
342
+ # will match java declaration of MyApple but not
343
+ # YourAppManager
344
+ def wsearch(term)
345
+ # Split stuff
346
+ puts "Wild Search request:#{term}"
347
+ m=term.split("*")
348
+ if m.length>0
349
+ trigramInAnd=Set.new()
350
+ puts "*= Found:#{m.length}"
351
+ m.each do | wtc |
352
+ wt=wtc.downcase()
353
+ #puts "Splitting #{wt}"
354
+ trigSet=split_in_trigrams(wt,"trigram:ci")
355
+ trigramInAnd=trigramInAnd.merge(trigSet)
356
+ end
357
+ # puts "Trigrams: #{trigramInAnd.length}"
358
+ # trigramInAnd.each do | x |
359
+ # puts "#{x}"
360
+ # end
361
+ if trigramInAnd.length==0
362
+ return []
363
+ end
364
+ fileIds=@redis.sinter(*trigramInAnd)
365
+ fileNames=map_ids_to_files(fileIds)
366
+ #puts "DEBUG #{fileIds} #{fileNames}"
367
+ return fileNames
368
+ else
369
+ puts "Warn no Wild!"
370
+ return search(term)
371
+ end
372
+ end
373
+
292
374
 
293
375
  # = search
294
376
  # Find a list of file candidates to a search string
data/readme.org CHANGED
@@ -52,7 +52,9 @@ It is still beta, but it is *very* fast, thank to redis!
52
52
  ** Parallel execution
53
53
  If you want to speed up indexing, you can use the mczindexer command.
54
54
  For instance:
55
+ #+BEGIN_SRC sh
55
56
  mczindexer eclipse-sources/
57
+ #+END_SRC
56
58
  will fire at most 10 parallel czindexer.
57
59
 
58
60
  ** Simple stats
@@ -64,29 +66,24 @@ Run
64
66
  and enjoy!
65
67
 
66
68
 
69
+ * DB Version
70
+ Starting from version 0.0.8, a new index check option on czindexer
71
+ will be able to migrate database between release,
72
+
73
+
74
+
67
75
  * Release History
68
- | Version | Date | Summary |
69
- |---------+-------------+-----------------------------------------------------------------|
70
- | 0.0.7 | 13 May 2012 | Better documentation, mczindexer, new report command
71
- | 0.0.6 | 04 May 2012 | New redis-server option. Better web search with results hilight |
72
- | 0.0.5 | 09 Apr 2012 | Added Sinatra-based web search page, featuring bootrstrap css |
73
- | 0.0.4 | 12 Feb 2012 | PDF Searching |
74
- | 0.0.3 | 03 Feb 2012 | Added Case insensitive search.UTF-8 trigram database |
75
- | 0.0.2 | 29 Jan 2012 | Removed dependency on unix find for czindexer. |
76
- | 0.0.1 | 26 Jan 2012 | First RubyGems Release (for testing purpose only) |
77
- | | | |
78
- | | | |
79
-
80
- * Aws tests
81
- ** Micro instance
82
- Without multiplexing you get
83
- real 4m39.599s
84
- for indexing code_zauker
85
-
86
- With
87
- time find . -type f -print0 | xargs -0 -P 10 -n 20 ./bin/czindexer -v --redis-server awsserver
88
- You get about
89
- real 0m31.284s
76
+ | Version | Date | Summary |
77
+ |---------+-------------+------------------------------------------------------------------------|
78
+ | 0.0.8 | 04 Jun 2012 | Wildcard (*) search/better error handling of missed files/indexchecker |
79
+ | 0.0.7 | 13 May 2012 | Better documentation, mczindexer, new report command |
80
+ | 0.0.6 | 04 May 2012 | New redis-server option. Better web search with results hilight |
81
+ | 0.0.5 | 09 Apr 2012 | Added Sinatra-based web search page, featuring bootrstrap css |
82
+ | 0.0.4 | 12 Feb 2012 | PDF Searching |
83
+ | 0.0.3 | 03 Feb 2012 | Added Case insensitive search.UTF-8 trigram database |
84
+ | 0.0.2 | 29 Jan 2012 | Removed dependency on unix find for czindexer. |
85
+ | 0.0.1 | 26 Jan 2012 | First RubyGems Release (for testing purpose only) |
86
+
90
87
 
91
88
 
92
89
  * DEVELOPING
@@ -0,0 +1 @@
1
+ Wild Wild West Movie
@@ -0,0 +1,39 @@
1
+ # -*- encoding: utf-8 -*-
2
+ # To test use
3
+ # rake TEST=test/test_wild_search.rb
4
+ require 'test/unit'
5
+ require 'code_zauker'
6
+
7
+ # See ri Test::Unit::Assertions
8
+ # for assertion documentation
9
+ class FileScannerBasicSearch < Test::Unit::TestCase
10
+ #This test can search very uinque things...
11
+ def test_foolish_wild1
12
+ fs=CodeZauker::FileScanner.new()
13
+ fs.load("./test/fixture/wildtest.txt")
14
+ files=fs.wsearch("Wild*West")
15
+ assert(files.include?("./test/fixture/wildtest.txt")== true,
16
+ "Expected file not found. Files found:#{files}")
17
+
18
+ end
19
+
20
+ def test_foolish_wild2
21
+ fs=CodeZauker::FileScanner.new()
22
+ fs.load("./test/fixture/wildtest.txt")
23
+ files=fs.wsearch("Wild*West*Movie")
24
+ assert(files.include?("./test/fixture/wildtest.txt")== true,
25
+ "Expected file not found. Files found:#{files}")
26
+ end
27
+
28
+ # Also unordered match will work
29
+ # So the negative match is difficult
30
+ def test_foolish_wild3
31
+ fs=CodeZauker::FileScanner.new()
32
+ fs.load("./test/fixture/wildtest.txt")
33
+ files=fs.wsearch("West*Wild*NotOnTheSameLineForSure")
34
+ assert(files.include?("./test/fixture/wildtest.txt")== false,
35
+ "Expected not matching wildtest.txt file. Matches:#{files}")
36
+ end
37
+
38
+
39
+ end