search_in_file 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
data/.gitignore ADDED
@@ -0,0 +1,18 @@
1
+ *.gem
2
+ *.rbc
3
+ .bundle
4
+ .config
5
+ .yardoc
6
+ Gemfile.lock
7
+ InstalledFiles
8
+ _yardoc
9
+ coverage
10
+ doc/
11
+ lib/bundler/man
12
+ pkg
13
+ rdoc
14
+ spec/reports
15
+ test/tmp
16
+ test/version_tmp
17
+ tmp
18
+ .DS_Store
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in search_in_file.gemspec
4
+ gemspec
data/LICENSE.txt ADDED
@@ -0,0 +1,22 @@
1
+ Copyright (c) 2014 bmalets
2
+
3
+ MIT License
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,35 @@
1
+ # SearchInFile
2
+
3
+ Search files with phrase by directory path.
4
+
5
+ ## Installation
6
+
7
+ Add this line to your application's Gemfile:
8
+
9
+ gem 'search_in_file'
10
+
11
+ And then execute:
12
+
13
+ $ bundle
14
+
15
+ Or install it yourself as:
16
+
17
+ $ gem install search_in_file
18
+
19
+ ## Usage
20
+
21
+ To search some document with specified phrase in your file system just use:
22
+
23
+ SearchInFile.search( directory_path, search_phrase )
24
+
25
+ P.S:
26
+ - Windows: it'll search phrase in all .doc, .docx, .pdf and .txt files by directory path
27
+ - OS_X or Linux: it'll search phrase in all .docx, .pdf and .txt files by directory path
28
+
29
+ ## Contributing
30
+
31
+ 1. Fork it ( http://github.com/bmalets/search_in_file/fork )
32
+ 2. Create your feature branch (`git checkout -b my-new-feature`)
33
+ 3. Commit your changes (`git commit -am 'Add some feature'`)
34
+ 4. Push to the branch (`git push origin my-new-feature`)
35
+ 5. Create new Pull Request
data/Rakefile ADDED
@@ -0,0 +1,7 @@
1
+ require "bundler/gem_tasks"
2
+ require "rspec/core/rake_task"
3
+
4
+ RSpec::Core::RakeTask.new
5
+
6
+ task :default => :spec
7
+ task :test => :spec
@@ -0,0 +1,23 @@
1
+ require 'win32ole'
2
+
3
+ class DocParser
4
+
5
+ def initialize
6
+ @content = ''
7
+ end
8
+
9
+ def read_file file_path
10
+ begin
11
+ word = WIN32OLE.connect( 'Word.Application' )
12
+ doc = word.activedocument
13
+ rescue
14
+ word = WIN32OLE.new( 'Word.Application' )
15
+ doc = word.documents.open( file_path )
16
+ end
17
+ word.visible = false
18
+ doc.sentences.each{ |x| @content << x.text }
19
+ doc.close
20
+ word.quit
21
+ @content
22
+ end
23
+ end
@@ -0,0 +1,15 @@
1
+ require 'docx'
2
+
3
+ class DocxParser
4
+
5
+ def initialize
6
+ @content = ''
7
+ end
8
+
9
+ def read_file file_path
10
+ doc = Docx::Document.open( file_path )
11
+ doc.paragraphs.each{ |p| @content << p.text }
12
+ @content
13
+ end
14
+
15
+ end
@@ -0,0 +1,18 @@
1
+ require 'pdf/reader'
2
+
3
+ class PdfParser
4
+
5
+ def initialize
6
+ @content = ''
7
+ end
8
+
9
+ def read_file file_path
10
+ PDF::Reader.open(file_path) do |reader|
11
+ reader.pages.each do |page|
12
+ @content << page.text
13
+ end
14
+ end
15
+ @content
16
+ end
17
+
18
+ end
@@ -0,0 +1,14 @@
1
+ class TxtParser
2
+
3
+ def initialize
4
+ @content = ''
5
+ end
6
+
7
+ def read_file file_path
8
+ File.open( file_path ).readlines.each do |line|
9
+ @content << line
10
+ end
11
+ @content
12
+ end
13
+
14
+ end
@@ -0,0 +1,3 @@
1
+ module SearchInFile
2
+ VERSION = "1.0.0"
3
+ end
@@ -0,0 +1,66 @@
1
+ require "search_in_file/version"
2
+ require 'find'
3
+
4
+ require "file_parsers/txt_parser"
5
+ require "file_parsers/pdf_parser"
6
+ require "file_parsers/doc_parser"
7
+ require "file_parsers/docx_parser"
8
+
9
+ module SearchInFile
10
+
11
+ # search files by phrase
12
+ def self.search( dir_path, term )
13
+ results = []
14
+ each_file_in( dir_path ) do |path|
15
+ # read file
16
+ parser_class = Object.const_get( "#{File.extname( path )[1..-1].capitalize}Parser" )
17
+ file_content = parser_class.new.read_file( path )
18
+ file_paragraphs = file_content.split(/\tor\n|\n/)
19
+ # search for phrase
20
+ paragraphs = []
21
+ file_paragraphs.each{ |p| paragraphs << p if p.include?(term) }
22
+ results << {file: path, paragraphs: paragraphs} if !paragraphs.empty?
23
+ end
24
+ results
25
+ end
26
+
27
+ def self.content_of file
28
+ class_name = "#{extname( file )[1..-1].capitalize}Parser"
29
+ parser_class = Object.const_get( class_name )
30
+ file_content = parser_class.new.read_file( file )
31
+ end
32
+
33
+ def self.paragraphs_of file
34
+ content_of( file ).split(/\tor\n|\n/)
35
+ end
36
+
37
+ def self.find_by_type_in d_path, f_type
38
+ f_paths = []
39
+ Find.find( d_path ){ |f| f_paths << f if extname?( f, f_type ) }
40
+ f_paths
41
+ end
42
+
43
+ def self.find_all_in d_path
44
+ f_paths = []
45
+ Find.find( d_path ){ |f| f_paths << f if is_document?( f ) }
46
+ f_paths
47
+ end
48
+
49
+ def self.each_file_in d_path
50
+ Find.find( d_path ){ |f| yield( f ) if is_document?( f ) && block_given? }
51
+ end
52
+
53
+ def self.extname file
54
+ File.extname( file )
55
+ end
56
+
57
+ def self.extname? file, type
58
+ extname( file ) == type
59
+ end
60
+
61
+ def self.is_document? f_name
62
+ f_type = extname( f_name )
63
+ ['.doc', '.docx', '.pdf', '.txt'].include? f_type
64
+ end
65
+
66
+ end
@@ -0,0 +1,26 @@
1
+ # coding: utf-8
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'search_in_file/version'
5
+
6
+ Gem::Specification.new do |spec|
7
+ spec.name = "search_in_file"
8
+ spec.version = SearchInFile::VERSION
9
+ spec.authors = ["bmalets"]
10
+ spec.email = ["b.malets@gmail.com"]
11
+ spec.summary = %q{Search files with phrase by directory path}
12
+ spec.description = %q{Search files with phrase by directory path}
13
+ spec.homepage = "https://github.com/bmalets/search_in_file/blob/master/README.md"
14
+ spec.license = "MIT"
15
+
16
+ spec.files = `git ls-files -z`.split("\x0")
17
+ spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
18
+ spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
19
+ spec.require_paths = ["lib"]
20
+
21
+ spec.add_development_dependency "bundler", "~> 1.5"
22
+ spec.add_development_dependency "rake"
23
+ spec.add_development_dependency "pdf-reader", "~> 1.3.3"
24
+ spec.add_development_dependency "docx", "~> 0.2.03"
25
+ spec.add_development_dependency 'rspec'
26
+ end
Binary file
Binary file
Binary file
@@ -0,0 +1,9 @@
1
+ David Heinemeier Hansson extracted Ruby on Rails from his work on Basecamp, a project management tool by 37signals (now a web application company).[3]
2
+
3
+ Hansson first released Rails as open source in July 2004, but did not share commit rights to the project until February 2005.[4] In August 2006, the framework reached a milestone when Apple announced that it would ship Ruby on Rails with Mac OS X v10.5 "Leopard",[5] which was released in October 2007.
4
+ Rails version 2.3 was released on March 15, 2009 with major new developments in templates, engines, Rack and nested model forms. Templates enable the developer to generate a skeleton application with custom gems and configurations. Engines give developers the ability to reuse application pieces complete with routes, view paths and models. The Rack web server interface and Metal allow one to write optimized pieces of code that route around ActionController.[6]
5
+ On December 23, 2008, Merb, another web application framework, was launched, and Ruby on Rails announced it would work with the Merb project to bring "the best ideas of Merb" into Rails 3, ending the "unnecessary duplication" across both communities.[7] Merb was merged with Rails as part of the Rails 3.0 release.[8][9]
6
+ Rails 3.1 was released on August 31, 2011, featuring Reversible Database Migrations, Asset Pipeline, Streaming, jQuery as default JavaScript library and newly introduced CoffeeScript and Sass into the stack.[10]
7
+ Rails 3.2 was released on January 20, 2012 with a faster development mode and routing engine (also known as Journey engine), Automatic Query Explain and Tagged Logging.[11] Rails 3.2.x is the last version that supports Ruby 1.8.7.[12] Rails 3.2.12 supports Ruby 2.0[13]
8
+ Rails 4.0 was released on June 25, 2013, introducing Russian Doll Caching, Turbolinks, Live Streaming as well as making Active Resource, Active Record Observer and other components optional by splitting them as gems.[14]
9
+ Aardvark Pty. Ltd. Often needs to retrieve documents on its hard drive based on those documents containing words or phrases. It thus needs a system that allows the user to enter the required phrase or word and to then find the document/s containing that phrase and to print them to screen. The type of documents that need to be accessed are
Binary file
Binary file
@@ -0,0 +1,11 @@
1
+ David Heinemeier Hansson extracted Ruby on Rails from his work on Basecamp, a project management tool by 37signals (now a web application company).[3]
2
+
3
+ Hansson first released Rails as open source in July 2004, but did not share commit rights to the project until February 2005.[4] In August 2006, the framework reached a milestone when Apple announced that it would ship Ruby on Rails with Mac OS X v10.5 "Leopard",[5] which was released in October 2007.
4
+ Rails version 2.3 was released on March 15, 2009 with major new developments in templates, engines, Rack and nested model forms. Templates enable the developer to generate a skeleton application with custom gems and configurations. Engines give developers the ability to reuse application pieces complete with routes, view paths and models. The Rack web server interface and Metal allow one to write optimized pieces of code that route around ActionController.[6]
5
+ On December 23, 2008, Merb, another web application framework, was launched, and Ruby on Rails announced it would work with the Merb project to bring "the best ideas of Merb" into Rails 3, ending the "unnecessary duplication" across both communities.[7] Merb was merged with Rails as part of the Rails 3.0 release.[8][9]
6
+ Rails 3.1 was released on August 31, 2011, featuring Reversible Database Migrations, Asset Pipeline, Streaming, jQuery as default JavaScript library and newly introduced CoffeeScript and Sass into the stack.[10]
7
+ Rails 3.2 was released on January 20, 2012 with a faster development mode and routing engine (also known as Journey engine), Automatic Query Explain and Tagged Logging.[11] Rails 3.2.x is the last version that supports Ruby 1.8.7.[12] Rails 3.2.12 supports Ruby 2.0[13]
8
+ Rails 4.0 was released on June 25, 2013, introducing Russian Doll Caching, Turbolinks, Live Streaming as well as making Active Resource, Active Record Observer and other components optional by splitting them as gems.[14]
9
+ Aardvark Pty. Ltd. Often needs to retrieve documents on its hard drive based on those documents containing words or phrases. It thus needs a system that allows the user to enter the required phrase or word and to then find the document/s containing that phrase and to print them to screen. The type of documents that need to be accessed are
10
+
11
+ sdf
Binary file
Binary file
@@ -0,0 +1 @@
1
+ require 'search_in_file'
@@ -0,0 +1,27 @@
1
+ require 'spec_helper'
2
+
3
+ describe "File Search System" do
4
+
5
+ describe "Search engine tests" do
6
+
7
+ 10.times do |number|
8
+ it "Search phrase in directory number #{number}" do
9
+
10
+ dir_path = File.expand_path('../..', __FILE__) + '/fixtures'
11
+ term = 'Often needs to retrieve documents on its'
12
+ results = SearchInFile.search( dir_path, term )
13
+
14
+ results.class == 'Array'
15
+ results.count == 9
16
+
17
+ results.each do |result|
18
+ ['.doc', '.docx', '.pdf', '.txt'].include? File.extname(result[:file])
19
+ File.exist?( result[:file] ).should == true
20
+
21
+ result[:paragraphs].each{ |paragraph| paragraph.include? term }
22
+ end
23
+ end
24
+ end
25
+
26
+ end
27
+ end
@@ -0,0 +1,44 @@
1
+ require 'spec_helper'
2
+
3
+ describe "Folder Search System" do
4
+
5
+ describe "Search in folder tests" do
6
+
7
+ 10.times do |number|
8
+ it "Search TXT files test number #{number}" do
9
+ tmp_path = File.expand_path('../..', __FILE__) + '/fixtures'
10
+ files = SearchInFile.find_by_type_in( tmp_path, '.txt' )
11
+ files.each{ |f| File.extname(f) == '.txt' }
12
+ files.count.should == 2
13
+ end
14
+ end
15
+
16
+ 10.times do |number|
17
+ it "Search PDF files test number #{number}" do
18
+ tmp_path = File.expand_path('../..', __FILE__) + '/fixtures'
19
+ files = SearchInFile.find_by_type_in( tmp_path, '.pdf' )
20
+ files.each{ |f| File.extname(f) == '.pdf' }
21
+ files.count.should == 3
22
+ end
23
+ end
24
+
25
+ 10.times do |number|
26
+ it "Search DOC files test number #{number}" do
27
+ tmp_path = File.expand_path('../..', __FILE__) + '/fixtures'
28
+ files = SearchInFile.find_by_type_in( tmp_path, '.doc' )
29
+ files.each{ |f| File.extname(f) == '.doc' }
30
+ files.count.should == 3
31
+ end
32
+ end
33
+
34
+ 10.times do |number|
35
+ it "Search DOCX files test number #{number}" do
36
+ tmp_path = File.expand_path('../..', __FILE__) + '/fixtures'
37
+ files = SearchInFile.find_by_type_in( tmp_path, '.docx' )
38
+ files.each{ |f| File.extname(f) == '.docx' }
39
+ files.count.should == 2
40
+ end
41
+ end
42
+
43
+ end
44
+ end
@@ -0,0 +1,40 @@
1
+ require 'spec_helper'
2
+
3
+ describe "File Search System" do
4
+
5
+ describe "Document Parsers Testing" do
6
+
7
+ 10.times do |number|
8
+ it "Read TXT files test number #{number}" do
9
+ tmp_path = File.expand_path('../..', __FILE__) + '/fixtures/basic.txt'
10
+ data_read = TxtParser.new.read_file(tmp_path)
11
+ data_read.include? 'Often needs to retrieve documents on its'
12
+ end
13
+ end
14
+
15
+ 10.times do |number|
16
+ it "Read DOC files test number #{number}" do
17
+ tmp_path = File.expand_path('../..', __FILE__) + '/fixtures/basic.doc'
18
+ data_read = DocParser.new.read_file(tmp_path)
19
+ data_read.include? 'Often needs to retrieve documents on its'
20
+ end
21
+ end
22
+
23
+ 10.times do |number|
24
+ it "Read DOCX files test number #{number}" do
25
+ tmp_path = File.expand_path('../..', __FILE__) + '/fixtures/basic.docx'
26
+ data_read = DocxParser.new.read_file(tmp_path)
27
+ data_read.include? 'Often needs to retrieve documents on its'
28
+ end
29
+ end
30
+
31
+ 10.times do |number|
32
+ it "Read PDF files test number #{number}" do
33
+ tmp_path = File.expand_path('../..', __FILE__) + '/fixtures/basic.pdf'
34
+ data_read = PdfParser.new.read_file(tmp_path)
35
+ data_read.include? 'Often needs to retrieve documents on its'
36
+ end
37
+ end
38
+
39
+ end
40
+ end
metadata ADDED
@@ -0,0 +1,168 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: search_in_file
3
+ version: !ruby/object:Gem::Version
4
+ version: 1.0.0
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - bmalets
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2014-06-05 00:00:00.000000000 Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ name: bundler
16
+ requirement: !ruby/object:Gem::Requirement
17
+ none: false
18
+ requirements:
19
+ - - ~>
20
+ - !ruby/object:Gem::Version
21
+ version: '1.5'
22
+ type: :development
23
+ prerelease: false
24
+ version_requirements: !ruby/object:Gem::Requirement
25
+ none: false
26
+ requirements:
27
+ - - ~>
28
+ - !ruby/object:Gem::Version
29
+ version: '1.5'
30
+ - !ruby/object:Gem::Dependency
31
+ name: rake
32
+ requirement: !ruby/object:Gem::Requirement
33
+ none: false
34
+ requirements:
35
+ - - ! '>='
36
+ - !ruby/object:Gem::Version
37
+ version: '0'
38
+ type: :development
39
+ prerelease: false
40
+ version_requirements: !ruby/object:Gem::Requirement
41
+ none: false
42
+ requirements:
43
+ - - ! '>='
44
+ - !ruby/object:Gem::Version
45
+ version: '0'
46
+ - !ruby/object:Gem::Dependency
47
+ name: pdf-reader
48
+ requirement: !ruby/object:Gem::Requirement
49
+ none: false
50
+ requirements:
51
+ - - ~>
52
+ - !ruby/object:Gem::Version
53
+ version: 1.3.3
54
+ type: :development
55
+ prerelease: false
56
+ version_requirements: !ruby/object:Gem::Requirement
57
+ none: false
58
+ requirements:
59
+ - - ~>
60
+ - !ruby/object:Gem::Version
61
+ version: 1.3.3
62
+ - !ruby/object:Gem::Dependency
63
+ name: docx
64
+ requirement: !ruby/object:Gem::Requirement
65
+ none: false
66
+ requirements:
67
+ - - ~>
68
+ - !ruby/object:Gem::Version
69
+ version: 0.2.03
70
+ type: :development
71
+ prerelease: false
72
+ version_requirements: !ruby/object:Gem::Requirement
73
+ none: false
74
+ requirements:
75
+ - - ~>
76
+ - !ruby/object:Gem::Version
77
+ version: 0.2.03
78
+ - !ruby/object:Gem::Dependency
79
+ name: rspec
80
+ requirement: !ruby/object:Gem::Requirement
81
+ none: false
82
+ requirements:
83
+ - - ! '>='
84
+ - !ruby/object:Gem::Version
85
+ version: '0'
86
+ type: :development
87
+ prerelease: false
88
+ version_requirements: !ruby/object:Gem::Requirement
89
+ none: false
90
+ requirements:
91
+ - - ! '>='
92
+ - !ruby/object:Gem::Version
93
+ version: '0'
94
+ description: Search files with phrase by directory path
95
+ email:
96
+ - b.malets@gmail.com
97
+ executables: []
98
+ extensions: []
99
+ extra_rdoc_files: []
100
+ files:
101
+ - .gitignore
102
+ - Gemfile
103
+ - LICENSE.txt
104
+ - README.md
105
+ - Rakefile
106
+ - lib/file_parsers/doc_parser.rb
107
+ - lib/file_parsers/docx_parser.rb
108
+ - lib/file_parsers/pdf_parser.rb
109
+ - lib/file_parsers/txt_parser.rb
110
+ - lib/search_in_file.rb
111
+ - lib/search_in_file/version.rb
112
+ - search_in_file.gemspec
113
+ - spec/fixtures/basic.doc
114
+ - spec/fixtures/basic.docx
115
+ - spec/fixtures/basic.pdf
116
+ - spec/fixtures/basic.txt
117
+ - spec/fixtures/folder1/basic.docx
118
+ - spec/fixtures/folder1/basic1.pdf
119
+ - spec/fixtures/folder1/basic1.txt
120
+ - spec/fixtures/folder2/basic.doc
121
+ - spec/fixtures/folder2/basic2.pdf
122
+ - spec/fixtures/folder2/folder3/basic.doc
123
+ - spec/fixtures/folder2/folder3/basic.pdf
124
+ - spec/spec_helper.rb
125
+ - spec/specs/search_engine_spec.rb
126
+ - spec/specs/search_files_spec.rb
127
+ - spec/specs/search_phrase_spec.rb
128
+ homepage: https://github.com/bmalets/search_in_file/blob/master/README.md
129
+ licenses:
130
+ - MIT
131
+ post_install_message:
132
+ rdoc_options: []
133
+ require_paths:
134
+ - lib
135
+ required_ruby_version: !ruby/object:Gem::Requirement
136
+ none: false
137
+ requirements:
138
+ - - ! '>='
139
+ - !ruby/object:Gem::Version
140
+ version: '0'
141
+ required_rubygems_version: !ruby/object:Gem::Requirement
142
+ none: false
143
+ requirements:
144
+ - - ! '>='
145
+ - !ruby/object:Gem::Version
146
+ version: '0'
147
+ requirements: []
148
+ rubyforge_project:
149
+ rubygems_version: 1.8.23.2
150
+ signing_key:
151
+ specification_version: 3
152
+ summary: Search files with phrase by directory path
153
+ test_files:
154
+ - spec/fixtures/basic.doc
155
+ - spec/fixtures/basic.docx
156
+ - spec/fixtures/basic.pdf
157
+ - spec/fixtures/basic.txt
158
+ - spec/fixtures/folder1/basic.docx
159
+ - spec/fixtures/folder1/basic1.pdf
160
+ - spec/fixtures/folder1/basic1.txt
161
+ - spec/fixtures/folder2/basic.doc
162
+ - spec/fixtures/folder2/basic2.pdf
163
+ - spec/fixtures/folder2/folder3/basic.doc
164
+ - spec/fixtures/folder2/folder3/basic.pdf
165
+ - spec/spec_helper.rb
166
+ - spec/specs/search_engine_spec.rb
167
+ - spec/specs/search_files_spec.rb
168
+ - spec/specs/search_phrase_spec.rb