code_zauker 0.0.5 → 0.0.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/.gitignore CHANGED
@@ -5,3 +5,4 @@ pkg/*
5
5
  code_zauker_index.rdb
6
6
  .yardoc
7
7
  publishMe
8
+ bin/aws*
data/bin/czindexer CHANGED
@@ -2,11 +2,18 @@
2
2
  # Suggested execution is mixing find / xargs with the parallel (P) parameters:
3
3
  # find test/fixture/ -type f | xargs -P 5 -n 10 ./bin/czindexer
4
4
  # will fire 5 czindexer each with 10 files to process...
5
+ require 'code_zauker/cli'
6
+ require 'redis/connection/hiredis'
7
+ require 'redis'
5
8
  require 'optparse'
6
9
  options={}
7
10
  optparse= OptionParser.new do |opts|
8
11
  opts.banner="Usage: czindexer [options] [file1] [file2]..."
9
12
  options[:verbose] = false
13
+ options[:redis_host]="127.0.0.1"
14
+ options[:redis_port]=6379
15
+ options[:redis_password]=nil
16
+
10
17
 
11
18
  opts.on( '-v', '--verbose', 'Output more information' ) do
12
19
  options[:verbose] = true
@@ -17,6 +24,20 @@ optparse= OptionParser.new do |opts|
17
24
  options[:reindex]=true
18
25
  end
19
26
 
27
+ opts.on('-h','--redis-server pass@SERVER:port', String,
28
+ 'Specify the alternate redis server to use')do |server|
29
+ myoptions=CodeZauker::CliUtil.new().parse_host_options(server)
30
+ options[:redis_host]=myoptions[:redis_host]
31
+ options[:redis_port]=myoptions[:redis_port]
32
+ options[:redis_password]=myoptions[:redis_password]
33
+
34
+ if options[:redis_password]
35
+ puts "Server: #{options[:redis_host]} Port:#{options[:redis_port]} WithPassword"
36
+ else
37
+ puts "Server: #{options[:redis_host]} Port:#{options[:redis_port]}"
38
+ end
39
+ end
40
+
20
41
  opts.on( '-h', '--help', 'Display this screen' ) do
21
42
  puts opts
22
43
  exit
@@ -82,7 +103,8 @@ end
82
103
  begin
83
104
  $CUMULATED_TIME=0
84
105
  # Allocated here to recycle connection
85
- fs=CodeZauker::FileScanner.new()
106
+ redisConnection=Redis.new(:host => options[:redis_host], :port => options[:redis_port], :password=> options[:redis_password])
107
+ fs=CodeZauker::FileScanner.new(redisConnection)
86
108
  $PROCESSED_FILES=0
87
109
  puts "Code Zauker v#{CodeZauker::VERSION}" if options[:verbose]
88
110
  puts "Reindexing..." if options[:verbose]==true and options[:reindex]==true
data/bin/czsearch CHANGED
@@ -4,6 +4,9 @@
4
4
  $VERBOSE=nil
5
5
  require 'code_zauker'
6
6
  require 'code_zauker/grep'
7
+ require 'code_zauker/cli'
8
+ require 'redis/connection/hiredis'
9
+ require 'redis'
7
10
  require 'tempfile'
8
11
  require 'pdf/reader'
9
12
  include Grep
@@ -17,6 +20,9 @@ optparse= OptionParser.new do |opts|
17
20
  options[:postcontext]=0
18
21
  options[:extensions_to_ignore]=[]
19
22
  options[:file_to_exclude]=[]
23
+ options[:redis_host]="127.0.0.1"
24
+ options[:redis_port]=6379
25
+ options[:redis_password]=nil
20
26
 
21
27
  opts.on('-i', '--ignore-case','ignore case distinctions') do
22
28
  options[:ignorecase]=true
@@ -44,9 +50,18 @@ optparse= OptionParser.new do |opts|
44
50
  end
45
51
 
46
52
 
47
- opts.on('-h','--redis-server SERVER', String,
53
+ opts.on('-h','--redis-server pass@SERVER:port', String,
48
54
  'Specify the alternate redis server to use')do |server|
49
- raise 'Still to be done'
55
+ myoptions=CodeZauker::CliUtil.new().parse_host_options(server)
56
+ options[:redis_host]=myoptions[:redis_host]
57
+ options[:redis_port]=myoptions[:redis_port]
58
+ options[:redis_password]=myoptions[:redis_password]
59
+
60
+ if options[:redis_password]
61
+ puts "Server: #{options[:redis_host]} Port:#{options[:redis_port]} WithPassword"
62
+ else
63
+ puts "Server: #{options[:redis_host]} Port:#{options[:redis_port]}"
64
+ end
50
65
  end
51
66
 
52
67
 
@@ -69,7 +84,8 @@ optparse.parse!
69
84
  ARGV.each do | s |
70
85
  #puts "Code Zauker Searching for #{s}"
71
86
  util=CodeZauker::Util.new()
72
- fs=CodeZauker::FileScanner.new()
87
+ redisConnection=Redis.new(:host => options[:redis_host], :port => options[:redis_port], :password=> options[:redis_password])
88
+ fs=CodeZauker::FileScanner.new(redisConnection)
73
89
  if options[:ignorecase]==false
74
90
  files=fs.search(s)
75
91
  pattern=/#{Regexp.escape(s)}/
@@ -0,0 +1,40 @@
1
+ module CodeZauker
2
+
3
+ class CliUtil
4
+
5
+ def parse_host_options(connection_string)
6
+ #puts "Parsing... #{connection_string}"
7
+ options={}
8
+ options[:redis_host]="127.0.0.1"
9
+ options[:redis_port]=6379
10
+ options[:redis_password]=nil
11
+ r=/(\w+)@([a-zA-Z0-9.]+):([0-9]+)?/
12
+ rNoPass=/([a-zA-Z0-9.]+):([0-9]+)?/
13
+ rHostAndPass=/(\w+)@([a-zA-Z0-9.]+)/
14
+ m=r.match(connection_string)
15
+ if m
16
+ options[:redis_password]=m.captures[0]
17
+ options[:redis_host]=m.captures[1]
18
+ options[:redis_port]=m.captures[2]
19
+
20
+ else
21
+ m=rNoPass.match(connection_string)
22
+ if m
23
+ options[:redis_host]=m.captures[0]
24
+ options[:redis_port]=m.captures[1]
25
+ else
26
+ # Check the auth@host case right here
27
+ m2=rHostAndPass.match(connection_string)
28
+ if m2
29
+ options[:redis_password]=m2.captures[0]
30
+ options[:redis_host]=m2.captures[1]
31
+ else
32
+ #puts "SERVER ONLY"
33
+ options[:redis_host]=connection_string
34
+ end
35
+ end
36
+ end
37
+ return options
38
+ end
39
+ end
40
+ end
@@ -1,5 +1,7 @@
1
1
  module CodeZauker
2
- MAX_PUSH_TRIGRAM_RETRIES=3
2
+ # Under Amazon AWS, a lot of timeout can happen.
3
+ # We put a higer retry here
4
+ MAX_PUSH_TRIGRAM_RETRIES=15
3
5
  # Stats
4
6
  # It is difficult to decide what is the best trigram push size.
5
7
  # a larger one ensure a best in memory processing but can lead to longer transactions
@@ -1,3 +1,3 @@
1
1
  module CodeZauker
2
- VERSION = "0.0.5"
2
+ VERSION = "0.0.6"
3
3
  end
@@ -3,6 +3,8 @@ require 'sinatra'
3
3
  require "code_zauker/version"
4
4
  require "code_zauker"
5
5
  require "erb"
6
+ require 'code_zauker/grep'
7
+ include Grep
6
8
 
7
9
  # See http://www.sinatrarb.com/intro
8
10
  get '/' do
@@ -14,7 +16,22 @@ get '/search' do
14
16
  # Process the search and show the results...
15
17
  fs=CodeZauker::FileScanner.new()
16
18
  files=fs.isearch(params[:q])
17
- erb :show_results, :locals => {:files => files, :q => params[:q] }
19
+ util=CodeZauker::Util.new()
20
+ abstracts=[]
21
+ files.each do |f|
22
+ if util.is_pdf?(f)==false
23
+ askedQuery=params[:q]
24
+ pattern=/#{Regexp.escape(askedQuery)}/i
25
+ lines=grep(f,pattern, pre_context=2, post_context=2);
26
+ desc=""
27
+ lines.each do |l |
28
+ hilighted=l.gsub(/(#{Regexp.escape(askedQuery)})/i){ "<b>#{$1}</b>"}
29
+ desc=desc+ "#{f}:#{hilighted}\n"
30
+ end
31
+ abstracts.push(desc)
32
+ end
33
+ end
34
+ erb :show_results, :locals => {:files => abstracts, :q => params[:q] }
18
35
  end
19
36
 
20
37
  configure do
data/readme.org CHANGED
@@ -1,6 +1,6 @@
1
1
  * Code Zauker: your code, indexed
2
2
  Code Zauker is a search engine for programming languages.
3
- Code Zauker is based from ideas taken by old Google Code Search and uses Redis as a basic platform
3
+ Code Zauker is based from ideas taken by old Google Code Search and uses Redis as storage engine.
4
4
 
5
5
  For news and discussion: http://gioorgi.com/tag/code-zauker/
6
6
 
@@ -13,17 +13,33 @@ To install Code Zauker,simply issue
13
13
  You need also [[http://redis.io/][redis-2.4.6]] or better.
14
14
  For a sample redis configuration see the etc/ directory of the project
15
15
 
16
+ * Try it out
17
+ Take a look to czindex and czsearch commands help for usage patterns.
18
+
19
+
16
20
 
17
21
  * Release History
18
- | Version | Date | Summary |
19
- |---------+-------------+---------------------------------------------------------------|
20
- | 0.0.5 | 09 Apr 2012 | Added Sinatra-based web search page, featuring bootrstrap css |
21
- | 0.0.4 | 12 Feb 2012 | PDF Searching |
22
- | 0.0.3 | 03 Feb 2012 | Added Case insensitive search.UTF-8 trigram database |
23
- | 0.0.2 | 29 Jan 2012 | Removed dependency on unix find for czindexer. |
24
- | 0.0.1 | 26 Jan 2012 | First RubyGems Release (for testing purpose only) |
25
- | | | |
26
- | | | |
22
+ | Version | Date | Summary |
23
+ |---------+-------------+-----------------------------------------------------------------|
24
+ | 0.0.6 | 04 May 2012 | New redis-server option. Better web search with results hilight |
25
+ | 0.0.5 | 09 Apr 2012 | Added Sinatra-based web search page, featuring bootrstrap css |
26
+ | 0.0.4 | 12 Feb 2012 | PDF Searching |
27
+ | 0.0.3 | 03 Feb 2012 | Added Case insensitive search.UTF-8 trigram database |
28
+ | 0.0.2 | 29 Jan 2012 | Removed dependency on unix find for czindexer. |
29
+ | 0.0.1 | 26 Jan 2012 | First RubyGems Release (for testing purpose only) |
30
+ | | | |
31
+ | | | |
32
+
33
+ * Aws tests
34
+ ** Micro instance
35
+ Without multiplexing you get
36
+ real 4m39.599s
37
+ for indexing code_zauker
38
+
39
+ With
40
+ time find . -type f -print0 | xargs -0 -P 10 -n 20 ./bin/czindexer -v --redis-server awsserver
41
+ You get about
42
+ real 0m31.284s
27
43
 
28
44
 
29
45
  * DEVELOPING
@@ -32,15 +32,20 @@
32
32
 
33
33
  </form>
34
34
  </p>
35
+ <style>
36
+ b{
37
+ color: #A1A100;
38
+ }
39
+ </style>
35
40
  <span class="badge badge-success"> <%=files.length%> Results</span>
36
41
  <ol>
37
42
  <%
38
43
  files.each do |f|
39
- %>
44
+ %>
40
45
  <li><%=f%>
41
- <%
46
+ <%
42
47
  end
43
- %>
48
+ %>
44
49
  </ol>
45
50
  </div>
46
51
  </div>
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: code_zauker
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.5
4
+ version: 0.0.6
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,11 +9,11 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2012-04-09 00:00:00.000000000 Z
12
+ date: 2012-05-04 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: yard
16
- requirement: &73314420 !ruby/object:Gem::Requirement
16
+ requirement: &74198570 !ruby/object:Gem::Requirement
17
17
  none: false
18
18
  requirements:
19
19
  - - ~>
@@ -21,10 +21,10 @@ dependencies:
21
21
  version: '0.7'
22
22
  type: :development
23
23
  prerelease: false
24
- version_requirements: *73314420
24
+ version_requirements: *74198570
25
25
  - !ruby/object:Gem::Dependency
26
26
  name: rubyzip
27
- requirement: &73314170 !ruby/object:Gem::Requirement
27
+ requirement: &74197690 !ruby/object:Gem::Requirement
28
28
  none: false
29
29
  requirements:
30
30
  - - ~>
@@ -32,10 +32,10 @@ dependencies:
32
32
  version: '0.9'
33
33
  type: :development
34
34
  prerelease: false
35
- version_requirements: *73314170
35
+ version_requirements: *74197690
36
36
  - !ruby/object:Gem::Dependency
37
37
  name: hiredis
38
- requirement: &73313940 !ruby/object:Gem::Requirement
38
+ requirement: &74196880 !ruby/object:Gem::Requirement
39
39
  none: false
40
40
  requirements:
41
41
  - - ~>
@@ -43,10 +43,10 @@ dependencies:
43
43
  version: '0.3'
44
44
  type: :runtime
45
45
  prerelease: false
46
- version_requirements: *73313940
46
+ version_requirements: *74196880
47
47
  - !ruby/object:Gem::Dependency
48
48
  name: redis
49
- requirement: &73313690 !ruby/object:Gem::Requirement
49
+ requirement: &74196090 !ruby/object:Gem::Requirement
50
50
  none: false
51
51
  requirements:
52
52
  - - ~>
@@ -54,10 +54,10 @@ dependencies:
54
54
  version: '2.2'
55
55
  type: :runtime
56
56
  prerelease: false
57
- version_requirements: *73313690
57
+ version_requirements: *74196090
58
58
  - !ruby/object:Gem::Dependency
59
59
  name: pdf-reader
60
- requirement: &73313440 !ruby/object:Gem::Requirement
60
+ requirement: &74187710 !ruby/object:Gem::Requirement
61
61
  none: false
62
62
  requirements:
63
63
  - - ~>
@@ -65,10 +65,10 @@ dependencies:
65
65
  version: 1.0.0
66
66
  type: :runtime
67
67
  prerelease: false
68
- version_requirements: *73313440
68
+ version_requirements: *74187710
69
69
  - !ruby/object:Gem::Dependency
70
70
  name: sinatra
71
- requirement: &73313150 !ruby/object:Gem::Requirement
71
+ requirement: &74186780 !ruby/object:Gem::Requirement
72
72
  none: false
73
73
  requirements:
74
74
  - - ~>
@@ -76,7 +76,7 @@ dependencies:
76
76
  version: '1.3'
77
77
  type: :runtime
78
78
  prerelease: false
79
- version_requirements: *73313150
79
+ version_requirements: *74186780
80
80
  description: Code Zauker is based from ideas taken by old Google Code Search and uses
81
81
  Redis as a basic platform
82
82
  email:
@@ -129,6 +129,7 @@ files:
129
129
  - htdocs/js/bootstrap.js
130
130
  - htdocs/js/bootstrap.min.js
131
131
  - lib/code_zauker.rb
132
+ - lib/code_zauker/cli.rb
132
133
  - lib/code_zauker/constants.rb
133
134
  - lib/code_zauker/grep.rb
134
135
  - lib/code_zauker/version.rb
@@ -163,7 +164,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
163
164
  version: '0'
164
165
  requirements: []
165
166
  rubyforge_project: code_zauker
166
- rubygems_version: 1.8.10
167
+ rubygems_version: 1.8.11
167
168
  signing_key:
168
169
  specification_version: 3
169
170
  summary: A search engine for programming languages