code_zauker 0.0.3 → 0.0.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/Rakefile +3 -3
- data/bin/czindexer +0 -0
- data/bin/czsearch +22 -3
- data/code_zauker.gemspec +3 -1
- data/doc/CodeZauker.html +10 -8
- data/doc/CodeZauker/FileScanner.html +173 -171
- data/doc/CodeZauker/Util.html +184 -9
- data/doc/Grep.html +5 -3
- data/doc/_index.html +4 -4
- data/doc/frames.html +1 -1
- data/doc/index.html +4 -4
- data/doc/js/full_list.js +6 -0
- data/doc/method_list.html +17 -1
- data/doc/top-level-namespace.html +5 -3
- data/lib/code_zauker.rb +61 -25
- data/lib/code_zauker/constants.rb +3 -3
- data/lib/code_zauker/version.rb +1 -1
- data/readme.org +3 -17
- data/test/fixture/simple_test.pdf +0 -0
- data/test/test_pdf_indexing.rb +38 -0
- metadata +25 -13
- data/CHANGELOG.org +0 -15
@@ -6,8 +6,7 @@ module CodeZauker
|
|
6
6
|
# 6000 Ehuristic value used for historical reasons
|
7
7
|
TRIGRAM_DEFAULT_PUSH_SIZE=6000
|
8
8
|
DEFAULT_EXCLUDED_EXTENSION=[
|
9
|
-
# Documents
|
10
|
-
".pdf",
|
9
|
+
# Documents
|
11
10
|
".xps",
|
12
11
|
".zip",".7z",
|
13
12
|
# MS Office zip-like files...
|
@@ -20,8 +19,9 @@ module CodeZauker
|
|
20
19
|
# Ruby and java stuff-like
|
21
20
|
".gem",
|
22
21
|
".jar",".class",".ear",".war",
|
22
|
+
".mar",
|
23
23
|
".tar",
|
24
|
-
".gz",
|
24
|
+
".gz",".Z",
|
25
25
|
".dropbox",
|
26
26
|
".svn-base",".pdb",".cache",
|
27
27
|
# Music exclusion
|
data/lib/code_zauker/version.rb
CHANGED
data/readme.org
CHANGED
@@ -14,29 +14,15 @@ You need also [[http://redis.io/][redis-2.4.6]] or better.
|
|
14
14
|
For a sample redis configuration see the etc/ directory of the project
|
15
15
|
|
16
16
|
|
17
|
-
** Release Notes (details)
|
18
|
-
** 0.0.3
|
19
|
-
Fully implemented case insensitive search
|
20
|
-
+ czindexer supports reindexing, verbose output
|
21
|
-
+ czsearcher supports case insenstive search, file exclusion, context clipping
|
22
|
-
+ Now Code Zauker handle Windows ISO file in a nice way.
|
23
|
-
*The whole trigram database will be stored in UTF-8*
|
24
|
-
** 0.0.2
|
25
|
-
Code Cleanup, reindexing features, and sample redis server config
|
26
|
-
czsearch now do not relay on unix grep, so it is easier to use with jruby
|
27
|
-
czsearch/czindexer supports options
|
28
|
-
** 0.0.1
|
29
|
-
First "we are here" release.
|
30
|
-
|
31
|
-
|
32
|
-
|
33
17
|
* Release History
|
34
18
|
| Version | Date | Summary |
|
35
19
|
|---------+-------------+------------------------------------------------------|
|
36
|
-
| 0.0.
|
20
|
+
| 0.0.4 | 12 Feb 2012 | PDF Searching |
|
21
|
+
| 0.0.3 | 03 Feb 2012 | Added Case insensitive search.UTF-8 trigram database |
|
37
22
|
| 0.0.2 | 29 Jan 2012 | Removed dependency on unix find for czindexer. |
|
38
23
|
| 0.0.1 | 26 Jan 2012 | First RubyGems Release (for testing purpose only) |
|
39
24
|
| | | |
|
25
|
+
| | | |
|
40
26
|
|
41
27
|
|
42
28
|
* DEVELOPING
|
Binary file
|
@@ -0,0 +1,38 @@
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
2
|
+
# To test use
|
3
|
+
# rake TEST=test/test_search.rb
|
4
|
+
require 'test/unit'
|
5
|
+
require 'code_zauker'
|
6
|
+
require 'pdf/reader'
|
7
|
+
|
8
|
+
# See ri Test::Unit::Assertions
|
9
|
+
# for assertion documentation
|
10
|
+
class FileScannerBasicSearch < Test::Unit::TestCase
|
11
|
+
|
12
|
+
# The pdf-reader add spuious space at the end of the text...
|
13
|
+
# perhaps it is some \r char?!...
|
14
|
+
def test_pdf_reader_simple()
|
15
|
+
reader = PDF::Reader.new("test/fixture/simple_test.pdf")
|
16
|
+
puts "PDF Ver: #{reader.pdf_version} INFO:#{reader.info}: \n#{reader.metadata}"
|
17
|
+
assert_equal "Giorgi Giovanni", reader.info[:Author]
|
18
|
+
page1=reader.page(1).text
|
19
|
+
#puts "Page 1\n:::#{page1}:::"
|
20
|
+
lines=page1.split("\n")
|
21
|
+
assert_equal "Simple PDF File generated with MSOffice 2010 ",lines[0],"Error. PDF Reader output:#{lines[0]}"
|
22
|
+
assert_equal lines[0][-1,1]," ", "Trailing whitespace bug expected"
|
23
|
+
assert_equal "Test case for Code Zauker v0.0.4+ ",lines[1]
|
24
|
+
# 4th row is about accents...
|
25
|
+
#puts ":#{lines[3]}:"
|
26
|
+
accentLine=lines[3].strip()
|
27
|
+
assert_equal accentLine,"àèéìòù"
|
28
|
+
assert_equal "UTF-8",accentLine.encoding().name
|
29
|
+
assert_equal true,accentLine.valid_encoding?()
|
30
|
+
end
|
31
|
+
|
32
|
+
def test_is_pdf()
|
33
|
+
u=CodeZauker::Util.new()
|
34
|
+
assert_equal true, u.is_pdf?("Case_crazy.PdF")
|
35
|
+
end
|
36
|
+
|
37
|
+
end
|
38
|
+
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: code_zauker
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.4
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,11 +9,11 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2012-02-
|
12
|
+
date: 2012-02-12 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: yard
|
16
|
-
requirement: &
|
16
|
+
requirement: &85208010 !ruby/object:Gem::Requirement
|
17
17
|
none: false
|
18
18
|
requirements:
|
19
19
|
- - ~>
|
@@ -21,10 +21,21 @@ dependencies:
|
|
21
21
|
version: '0.7'
|
22
22
|
type: :development
|
23
23
|
prerelease: false
|
24
|
-
version_requirements: *
|
24
|
+
version_requirements: *85208010
|
25
|
+
- !ruby/object:Gem::Dependency
|
26
|
+
name: rubyzip
|
27
|
+
requirement: &85207750 !ruby/object:Gem::Requirement
|
28
|
+
none: false
|
29
|
+
requirements:
|
30
|
+
- - ~>
|
31
|
+
- !ruby/object:Gem::Version
|
32
|
+
version: '0.9'
|
33
|
+
type: :development
|
34
|
+
prerelease: false
|
35
|
+
version_requirements: *85207750
|
25
36
|
- !ruby/object:Gem::Dependency
|
26
37
|
name: hiredis
|
27
|
-
requirement: &
|
38
|
+
requirement: &85207460 !ruby/object:Gem::Requirement
|
28
39
|
none: false
|
29
40
|
requirements:
|
30
41
|
- - ~>
|
@@ -32,10 +43,10 @@ dependencies:
|
|
32
43
|
version: '0.3'
|
33
44
|
type: :runtime
|
34
45
|
prerelease: false
|
35
|
-
version_requirements: *
|
46
|
+
version_requirements: *85207460
|
36
47
|
- !ruby/object:Gem::Dependency
|
37
48
|
name: redis
|
38
|
-
requirement: &
|
49
|
+
requirement: &85207140 !ruby/object:Gem::Requirement
|
39
50
|
none: false
|
40
51
|
requirements:
|
41
52
|
- - ~>
|
@@ -43,18 +54,18 @@ dependencies:
|
|
43
54
|
version: '2.2'
|
44
55
|
type: :runtime
|
45
56
|
prerelease: false
|
46
|
-
version_requirements: *
|
57
|
+
version_requirements: *85207140
|
47
58
|
- !ruby/object:Gem::Dependency
|
48
|
-
name:
|
49
|
-
requirement: &
|
59
|
+
name: pdf-reader
|
60
|
+
requirement: &85195530 !ruby/object:Gem::Requirement
|
50
61
|
none: false
|
51
62
|
requirements:
|
52
63
|
- - ~>
|
53
64
|
- !ruby/object:Gem::Version
|
54
|
-
version:
|
65
|
+
version: 1.0.0
|
55
66
|
type: :runtime
|
56
67
|
prerelease: false
|
57
|
-
version_requirements: *
|
68
|
+
version_requirements: *85195530
|
58
69
|
description: Code Zauker is based from ideas taken by old Google Code Search and uses
|
59
70
|
Redis as a basic platform
|
60
71
|
email:
|
@@ -68,7 +79,6 @@ extra_rdoc_files: []
|
|
68
79
|
files:
|
69
80
|
- .gitignore
|
70
81
|
- BUGS.org
|
71
|
-
- CHANGELOG.org
|
72
82
|
- Gemfile
|
73
83
|
- LICENSE.txt
|
74
84
|
- Rakefile
|
@@ -103,7 +113,9 @@ files:
|
|
103
113
|
- test/fixture/TEST_LICENSE.txt
|
104
114
|
- test/fixture/foolish.txt
|
105
115
|
- test/fixture/kurukku.txt
|
116
|
+
- test/fixture/simple_test.pdf
|
106
117
|
- test/fixture/testArchive.zip
|
118
|
+
- test/test_pdf_indexing.rb
|
107
119
|
- test/test_search.rb
|
108
120
|
homepage: http://gioorgi.com/tag/code-zauker/
|
109
121
|
licenses: []
|
data/CHANGELOG.org
DELETED
@@ -1,15 +0,0 @@
|
|
1
|
-
|2012-02-03|Updated readme|
|
2
|
-
|2012-02-03|Code Zauker ready for v0.0.3 release|
|
3
|
-
|2012-02-02|czindexer now offers better statistic and less verbose output|
|
4
|
-
|2012-02-02|czsearch now is able to exclude file patterns|
|
5
|
-
|2012-02-02|Better utf-8 handling|
|
6
|
-
|2012-02-01|Correct parsing and grep-ping of no-UTF-8 files. Fixed map2ids small bug. Test ZipReading|
|
7
|
-
|2012-01-31|Case insensitive search works great|
|
8
|
-
|2012-01-30|Case insensitive implemented|
|
9
|
-
|2012-01-30|insensitive search via special insensitive trigram|
|
10
|
-
|2012-01-29|Version 0.0.2 release|
|
11
|
-
|2012-01-29|Version 0.0.2|
|
12
|
-
|2012-01-27|No more dependency from unix commands|
|
13
|
-
|2012-01-27|Beta code zauker 0.0.2|
|
14
|
-
|2012-01-27|Beta code zauker 0.0.2|
|
15
|
-
|2012-01-26|Code Zauker 0.0.1|
|