code_zauker 0.0.3 → 0.0.4

Sign up to get free protection for your applications and to get access to all the features.
@@ -6,8 +6,7 @@ module CodeZauker
6
6
  # 6000 Ehuristic value used for historical reasons
7
7
  TRIGRAM_DEFAULT_PUSH_SIZE=6000
8
8
  DEFAULT_EXCLUDED_EXTENSION=[
9
- # Documents
10
- ".pdf",
9
+ # Documents
11
10
  ".xps",
12
11
  ".zip",".7z",
13
12
  # MS Office zip-like files...
@@ -20,8 +19,9 @@ module CodeZauker
20
19
  # Ruby and java stuff-like
21
20
  ".gem",
22
21
  ".jar",".class",".ear",".war",
22
+ ".mar",
23
23
  ".tar",
24
- ".gz",
24
+ ".gz",".Z",
25
25
  ".dropbox",
26
26
  ".svn-base",".pdb",".cache",
27
27
  # Music exclusion
@@ -1,3 +1,3 @@
1
1
  module CodeZauker
2
- VERSION = "0.0.3"
2
+ VERSION = "0.0.4"
3
3
  end
data/readme.org CHANGED
@@ -14,29 +14,15 @@ You need also [[http://redis.io/][redis-2.4.6]] or better.
14
14
  For a sample redis configuration see the etc/ directory of the project
15
15
 
16
16
 
17
- ** Release Notes (details)
18
- ** 0.0.3
19
- Fully implemented case insensitive search
20
- + czindexer supports reindexing, verbose output
21
- + czsearcher supports case insenstive search, file exclusion, context clipping
22
- + Now Code Zauker handle Windows ISO file in a nice way.
23
- *The whole trigram database will be stored in UTF-8*
24
- ** 0.0.2
25
- Code Cleanup, reindexing features, and sample redis server config
26
- czsearch now do not relay on unix grep, so it is easier to use with jruby
27
- czsearch/czindexer supports options
28
- ** 0.0.1
29
- First "we are here" release.
30
-
31
-
32
-
33
17
  * Release History
34
18
  | Version | Date | Summary |
35
19
  |---------+-------------+------------------------------------------------------|
36
- | 0.0.3 | | Added Case insensitive search UTF-8 trigram database |
20
+ | 0.0.4 | 12 Feb 2012 | PDF Searching |
21
+ | 0.0.3 | 03 Feb 2012 | Added Case insensitive search.UTF-8 trigram database |
37
22
  | 0.0.2 | 29 Jan 2012 | Removed dependency on unix find for czindexer. |
38
23
  | 0.0.1 | 26 Jan 2012 | First RubyGems Release (for testing purpose only) |
39
24
  | | | |
25
+ | | | |
40
26
 
41
27
 
42
28
  * DEVELOPING
Binary file
@@ -0,0 +1,38 @@
1
+ # -*- encoding: utf-8 -*-
2
+ # To test use
3
+ # rake TEST=test/test_search.rb
4
+ require 'test/unit'
5
+ require 'code_zauker'
6
+ require 'pdf/reader'
7
+
8
+ # See ri Test::Unit::Assertions
9
+ # for assertion documentation
10
+ class FileScannerBasicSearch < Test::Unit::TestCase
11
+
12
+ # The pdf-reader add spuious space at the end of the text...
13
+ # perhaps it is some \r char?!...
14
+ def test_pdf_reader_simple()
15
+ reader = PDF::Reader.new("test/fixture/simple_test.pdf")
16
+ puts "PDF Ver: #{reader.pdf_version} INFO:#{reader.info}: \n#{reader.metadata}"
17
+ assert_equal "Giorgi Giovanni", reader.info[:Author]
18
+ page1=reader.page(1).text
19
+ #puts "Page 1\n:::#{page1}:::"
20
+ lines=page1.split("\n")
21
+ assert_equal "Simple PDF File generated with MSOffice 2010 ",lines[0],"Error. PDF Reader output:#{lines[0]}"
22
+ assert_equal lines[0][-1,1]," ", "Trailing whitespace bug expected"
23
+ assert_equal "Test case for Code Zauker v0.0.4+ ",lines[1]
24
+ # 4th row is about accents...
25
+ #puts ":#{lines[3]}:"
26
+ accentLine=lines[3].strip()
27
+ assert_equal accentLine,"àèéìòù"
28
+ assert_equal "UTF-8",accentLine.encoding().name
29
+ assert_equal true,accentLine.valid_encoding?()
30
+ end
31
+
32
+ def test_is_pdf()
33
+ u=CodeZauker::Util.new()
34
+ assert_equal true, u.is_pdf?("Case_crazy.PdF")
35
+ end
36
+
37
+ end
38
+
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: code_zauker
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.3
4
+ version: 0.0.4
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,11 +9,11 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2012-02-03 00:00:00.000000000 Z
12
+ date: 2012-02-12 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: yard
16
- requirement: &74500230 !ruby/object:Gem::Requirement
16
+ requirement: &85208010 !ruby/object:Gem::Requirement
17
17
  none: false
18
18
  requirements:
19
19
  - - ~>
@@ -21,10 +21,21 @@ dependencies:
21
21
  version: '0.7'
22
22
  type: :development
23
23
  prerelease: false
24
- version_requirements: *74500230
24
+ version_requirements: *85208010
25
+ - !ruby/object:Gem::Dependency
26
+ name: rubyzip
27
+ requirement: &85207750 !ruby/object:Gem::Requirement
28
+ none: false
29
+ requirements:
30
+ - - ~>
31
+ - !ruby/object:Gem::Version
32
+ version: '0.9'
33
+ type: :development
34
+ prerelease: false
35
+ version_requirements: *85207750
25
36
  - !ruby/object:Gem::Dependency
26
37
  name: hiredis
27
- requirement: &74499940 !ruby/object:Gem::Requirement
38
+ requirement: &85207460 !ruby/object:Gem::Requirement
28
39
  none: false
29
40
  requirements:
30
41
  - - ~>
@@ -32,10 +43,10 @@ dependencies:
32
43
  version: '0.3'
33
44
  type: :runtime
34
45
  prerelease: false
35
- version_requirements: *74499940
46
+ version_requirements: *85207460
36
47
  - !ruby/object:Gem::Dependency
37
48
  name: redis
38
- requirement: &74499670 !ruby/object:Gem::Requirement
49
+ requirement: &85207140 !ruby/object:Gem::Requirement
39
50
  none: false
40
51
  requirements:
41
52
  - - ~>
@@ -43,18 +54,18 @@ dependencies:
43
54
  version: '2.2'
44
55
  type: :runtime
45
56
  prerelease: false
46
- version_requirements: *74499670
57
+ version_requirements: *85207140
47
58
  - !ruby/object:Gem::Dependency
48
- name: rubyzip
49
- requirement: &74499420 !ruby/object:Gem::Requirement
59
+ name: pdf-reader
60
+ requirement: &85195530 !ruby/object:Gem::Requirement
50
61
  none: false
51
62
  requirements:
52
63
  - - ~>
53
64
  - !ruby/object:Gem::Version
54
- version: '0.9'
65
+ version: 1.0.0
55
66
  type: :runtime
56
67
  prerelease: false
57
- version_requirements: *74499420
68
+ version_requirements: *85195530
58
69
  description: Code Zauker is based from ideas taken by old Google Code Search and uses
59
70
  Redis as a basic platform
60
71
  email:
@@ -68,7 +79,6 @@ extra_rdoc_files: []
68
79
  files:
69
80
  - .gitignore
70
81
  - BUGS.org
71
- - CHANGELOG.org
72
82
  - Gemfile
73
83
  - LICENSE.txt
74
84
  - Rakefile
@@ -103,7 +113,9 @@ files:
103
113
  - test/fixture/TEST_LICENSE.txt
104
114
  - test/fixture/foolish.txt
105
115
  - test/fixture/kurukku.txt
116
+ - test/fixture/simple_test.pdf
106
117
  - test/fixture/testArchive.zip
118
+ - test/test_pdf_indexing.rb
107
119
  - test/test_search.rb
108
120
  homepage: http://gioorgi.com/tag/code-zauker/
109
121
  licenses: []
data/CHANGELOG.org DELETED
@@ -1,15 +0,0 @@
1
- |2012-02-03|Updated readme|
2
- |2012-02-03|Code Zauker ready for v0.0.3 release|
3
- |2012-02-02|czindexer now offers better statistic and less verbose output|
4
- |2012-02-02|czsearch now is able to exclude file patterns|
5
- |2012-02-02|Better utf-8 handling|
6
- |2012-02-01|Correct parsing and grep-ping of no-UTF-8 files. Fixed map2ids small bug. Test ZipReading|
7
- |2012-01-31|Case insensitive search works great|
8
- |2012-01-30|Case insensitive implemented|
9
- |2012-01-30|insensitive search via special insensitive trigram|
10
- |2012-01-29|Version 0.0.2 release|
11
- |2012-01-29|Version 0.0.2|
12
- |2012-01-27|No more dependency from unix commands|
13
- |2012-01-27|Beta code zauker 0.0.2|
14
- |2012-01-27|Beta code zauker 0.0.2|
15
- |2012-01-26|Code Zauker 0.0.1|