search_in_file 1.0.0 → 1.0.1
Sign up to get free protection for your applications and to get access to all the features.
- data/lib/search_in_file.rb +22 -16
- data/lib/search_in_file/settings.rb +17 -0
- data/lib/search_in_file/version.rb +1 -1
- data/search_in_file.gemspec +1 -1
- data/spec/specs/search_engine_spec.rb +42 -10
- data/spec/specs/search_files_spec.rb +15 -17
- data/spec/specs/search_phrase_spec.rb +11 -12
- metadata +4 -2
data/lib/search_in_file.rb
CHANGED
@@ -1,29 +1,36 @@
|
|
1
1
|
require "search_in_file/version"
|
2
|
-
require
|
2
|
+
require "search_in_file/settings"
|
3
3
|
|
4
4
|
require "file_parsers/txt_parser"
|
5
5
|
require "file_parsers/pdf_parser"
|
6
|
-
require "file_parsers/doc_parser"
|
6
|
+
require "file_parsers/doc_parser" if Gem.win_platform?
|
7
7
|
require "file_parsers/docx_parser"
|
8
8
|
|
9
|
+
require "find"
|
10
|
+
|
9
11
|
module SearchInFile
|
10
12
|
|
11
|
-
|
12
|
-
|
13
|
+
def self.search( path, term )
|
14
|
+
is_document?( path ) ? search_in_file( path, term ) : search_in_directory( path, term )
|
15
|
+
end
|
16
|
+
|
17
|
+
def self.search_in_directory( path, term )
|
13
18
|
results = []
|
14
|
-
each_file_in(
|
15
|
-
|
16
|
-
|
17
|
-
file_content = parser_class.new.read_file( path )
|
18
|
-
file_paragraphs = file_content.split(/\tor\n|\n/)
|
19
|
-
# search for phrase
|
20
|
-
paragraphs = []
|
21
|
-
file_paragraphs.each{ |p| paragraphs << p if p.include?(term) }
|
22
|
-
results << {file: path, paragraphs: paragraphs} if !paragraphs.empty?
|
19
|
+
each_file_in( path ) do |f_path|
|
20
|
+
f_result = search_in_file( f_path, term )
|
21
|
+
results = results + f_result if !f_result.empty?
|
23
22
|
end
|
24
23
|
results
|
25
24
|
end
|
26
25
|
|
26
|
+
def self.search_in_file( f_path, term )
|
27
|
+
term_paragraphs = []
|
28
|
+
file_paragraphs = paragraphs_of( f_path )
|
29
|
+
# search for phrase
|
30
|
+
file_paragraphs.each{ |p| term_paragraphs << p if p.include?(term) }
|
31
|
+
term_paragraphs.empty? ? [] : [{file: f_path, paragraphs: term_paragraphs}]
|
32
|
+
end
|
33
|
+
|
27
34
|
def self.content_of file
|
28
35
|
class_name = "#{extname( file )[1..-1].capitalize}Parser"
|
29
36
|
parser_class = Object.const_get( class_name )
|
@@ -31,7 +38,7 @@ module SearchInFile
|
|
31
38
|
end
|
32
39
|
|
33
40
|
def self.paragraphs_of file
|
34
|
-
content_of( file ).split(/\
|
41
|
+
content_of( file ).split(/\r/)
|
35
42
|
end
|
36
43
|
|
37
44
|
def self.find_by_type_in d_path, f_type
|
@@ -59,8 +66,7 @@ module SearchInFile
|
|
59
66
|
end
|
60
67
|
|
61
68
|
def self.is_document? f_name
|
62
|
-
|
63
|
-
['.doc', '.docx', '.pdf', '.txt'].include? f_type
|
69
|
+
Settings.supported_docs.include?( extname(f_name) )
|
64
70
|
end
|
65
71
|
|
66
72
|
end
|
@@ -0,0 +1,17 @@
|
|
1
|
+
require 'rbconfig'
|
2
|
+
|
3
|
+
module Settings
|
4
|
+
|
5
|
+
def self.supported_docs
|
6
|
+
Gem.win_platform? ? ['.doc', '.docx', '.pdf', '.txt'] : ['.docx', '.pdf', '.txt']
|
7
|
+
end
|
8
|
+
|
9
|
+
def self.test_count
|
10
|
+
1
|
11
|
+
end
|
12
|
+
|
13
|
+
def self.fixtures_path
|
14
|
+
File.expand_path('../../..', __FILE__) + '/spec/fixtures'
|
15
|
+
end
|
16
|
+
|
17
|
+
end
|
data/search_in_file.gemspec
CHANGED
@@ -9,7 +9,7 @@ Gem::Specification.new do |spec|
|
|
9
9
|
spec.authors = ["bmalets"]
|
10
10
|
spec.email = ["b.malets@gmail.com"]
|
11
11
|
spec.summary = %q{Search files with phrase by directory path}
|
12
|
-
spec.description = %q{Search
|
12
|
+
spec.description = %q{Search word or phrase in .pdf,.txt,.doc and .docx files by directory path}
|
13
13
|
spec.homepage = "https://github.com/bmalets/search_in_file/blob/master/README.md"
|
14
14
|
spec.license = "MIT"
|
15
15
|
|
@@ -4,23 +4,55 @@ describe "File Search System" do
|
|
4
4
|
|
5
5
|
describe "Search engine tests" do
|
6
6
|
|
7
|
-
|
8
|
-
it "Search phrase in directory number #{number}" do
|
7
|
+
Settings.test_count.times do |number|
|
9
8
|
|
10
|
-
|
9
|
+
it "Search phrase in directory or in file number #{number}" do
|
10
|
+
dir_path = Settings.fixtures_path
|
11
11
|
term = 'Often needs to retrieve documents on its'
|
12
12
|
results = SearchInFile.search( dir_path, term )
|
13
13
|
|
14
|
-
results.class
|
15
|
-
results.count
|
14
|
+
expect(results.class).to eq(Array)
|
15
|
+
expect(results.count).to eq( Gem.win_platform? ? 7 : 4)
|
16
|
+
|
17
|
+
results.each do |result|
|
18
|
+
Settings.supported_docs.include? File.extname(result[:file])
|
19
|
+
File.exist?( result[:file] ).should be(true)
|
20
|
+
result[:paragraphs].each{ |paragraph| expect( paragraph.include?(term) ).to be true }
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
24
|
+
it "Search phrase in directory number #{number}" do
|
25
|
+
dir_path = Settings.fixtures_path
|
26
|
+
term = 'Often needs to retrieve documents on its'
|
27
|
+
results = SearchInFile.search_in_directory( dir_path, term )
|
28
|
+
|
29
|
+
expect(results.class).to eq(Array)
|
30
|
+
expect(results.count).to eq( Gem.win_platform? ? 7 : 4)
|
16
31
|
|
17
|
-
results.each do |result|
|
18
|
-
|
19
|
-
File.exist?( result[:file] ).should
|
20
|
-
|
21
|
-
result[:paragraphs].each{ |paragraph| paragraph.include? term }
|
32
|
+
results.each do |result|
|
33
|
+
Settings.supported_docs.include? File.extname(result[:file])
|
34
|
+
File.exist?( result[:file] ).should be(true)
|
35
|
+
result[:paragraphs].each{ |paragraph| expect( paragraph.include?(term) ).to be true }
|
22
36
|
end
|
23
37
|
end
|
38
|
+
|
39
|
+
Settings.supported_docs.each do |f_type|
|
40
|
+
|
41
|
+
it "Search phrase in #{f_type} file number #{number}" do
|
42
|
+
f_path = Settings.fixtures_path + "/basic#{f_type}"
|
43
|
+
term = 'Often needs to retrieve documents on its'
|
44
|
+
results = SearchInFile.search_in_file( f_path, term )
|
45
|
+
|
46
|
+
expect(results.class).to eq(Array)
|
47
|
+
results.each do |result|
|
48
|
+
Settings.supported_docs.include? File.extname(result[:file])
|
49
|
+
expect( File.exist?( result[:file] ) ).to be true
|
50
|
+
result[:paragraphs].each{ |paragraph| paragraph.include? term }
|
51
|
+
end
|
52
|
+
end
|
53
|
+
|
54
|
+
end
|
55
|
+
|
24
56
|
end
|
25
57
|
|
26
58
|
end
|
@@ -4,40 +4,38 @@ describe "Folder Search System" do
|
|
4
4
|
|
5
5
|
describe "Search in folder tests" do
|
6
6
|
|
7
|
-
|
7
|
+
Settings.test_count.times do |number|
|
8
|
+
|
8
9
|
it "Search TXT files test number #{number}" do
|
9
|
-
tmp_path =
|
10
|
-
files
|
10
|
+
tmp_path = Settings.fixtures_path
|
11
|
+
files = SearchInFile.find_by_type_in( tmp_path, '.txt' )
|
11
12
|
files.each{ |f| File.extname(f) == '.txt' }
|
12
13
|
files.count.should == 2
|
13
14
|
end
|
14
|
-
end
|
15
15
|
|
16
|
-
|
16
|
+
|
17
17
|
it "Search PDF files test number #{number}" do
|
18
|
-
tmp_path =
|
18
|
+
tmp_path = Settings.fixtures_path
|
19
19
|
files = SearchInFile.find_by_type_in( tmp_path, '.pdf' )
|
20
20
|
files.each{ |f| File.extname(f) == '.pdf' }
|
21
|
-
files.count.should ==
|
21
|
+
files.count.should == 4
|
22
22
|
end
|
23
|
-
|
24
|
-
|
25
|
-
10.times do |number|
|
23
|
+
|
26
24
|
it "Search DOC files test number #{number}" do
|
27
|
-
tmp_path =
|
28
|
-
files
|
25
|
+
tmp_path = Settings.fixtures_path
|
26
|
+
files = SearchInFile.find_by_type_in( tmp_path, '.doc' )
|
29
27
|
files.each{ |f| File.extname(f) == '.doc' }
|
30
28
|
files.count.should == 3
|
31
|
-
end
|
32
|
-
|
29
|
+
end
|
30
|
+
|
33
31
|
|
34
|
-
10.times do |number|
|
35
32
|
it "Search DOCX files test number #{number}" do
|
36
|
-
tmp_path =
|
37
|
-
files
|
33
|
+
tmp_path = Settings.fixtures_path
|
34
|
+
files = SearchInFile.find_by_type_in( tmp_path, '.docx' )
|
38
35
|
files.each{ |f| File.extname(f) == '.docx' }
|
39
36
|
files.count.should == 2
|
40
37
|
end
|
38
|
+
|
41
39
|
end
|
42
40
|
|
43
41
|
end
|
@@ -4,36 +4,35 @@ describe "File Search System" do
|
|
4
4
|
|
5
5
|
describe "Document Parsers Testing" do
|
6
6
|
|
7
|
-
|
7
|
+
Settings.test_count.times do |number|
|
8
|
+
|
8
9
|
it "Read TXT files test number #{number}" do
|
9
|
-
tmp_path =
|
10
|
+
tmp_path = Settings.fixtures_path + '/basic.txt'
|
10
11
|
data_read = TxtParser.new.read_file(tmp_path)
|
11
12
|
data_read.include? 'Often needs to retrieve documents on its'
|
12
13
|
end
|
13
|
-
end
|
14
14
|
|
15
|
-
|
15
|
+
|
16
16
|
it "Read DOC files test number #{number}" do
|
17
|
-
tmp_path =
|
17
|
+
tmp_path = Settings.fixtures_path + '/basic.doc'
|
18
18
|
data_read = DocParser.new.read_file(tmp_path)
|
19
19
|
data_read.include? 'Often needs to retrieve documents on its'
|
20
|
-
end
|
21
|
-
|
20
|
+
end if Gem.win_platform?
|
21
|
+
|
22
22
|
|
23
|
-
10.times do |number|
|
24
23
|
it "Read DOCX files test number #{number}" do
|
25
|
-
tmp_path =
|
24
|
+
tmp_path = Settings.fixtures_path + '/basic.docx'
|
26
25
|
data_read = DocxParser.new.read_file(tmp_path)
|
27
26
|
data_read.include? 'Often needs to retrieve documents on its'
|
28
27
|
end
|
29
|
-
end
|
30
28
|
|
31
|
-
|
29
|
+
|
32
30
|
it "Read PDF files test number #{number}" do
|
33
|
-
tmp_path =
|
31
|
+
tmp_path = Settings.fixtures_path + '/basic.pdf'
|
34
32
|
data_read = PdfParser.new.read_file(tmp_path)
|
35
33
|
data_read.include? 'Often needs to retrieve documents on its'
|
36
34
|
end
|
35
|
+
|
37
36
|
end
|
38
37
|
|
39
38
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: search_in_file
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.0.
|
4
|
+
version: 1.0.1
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -91,7 +91,8 @@ dependencies:
|
|
91
91
|
- - ! '>='
|
92
92
|
- !ruby/object:Gem::Version
|
93
93
|
version: '0'
|
94
|
-
description: Search
|
94
|
+
description: Search word or phrase in .pdf,.txt,.doc and .docx files by directory
|
95
|
+
path
|
95
96
|
email:
|
96
97
|
- b.malets@gmail.com
|
97
98
|
executables: []
|
@@ -108,6 +109,7 @@ files:
|
|
108
109
|
- lib/file_parsers/pdf_parser.rb
|
109
110
|
- lib/file_parsers/txt_parser.rb
|
110
111
|
- lib/search_in_file.rb
|
112
|
+
- lib/search_in_file/settings.rb
|
111
113
|
- lib/search_in_file/version.rb
|
112
114
|
- search_in_file.gemspec
|
113
115
|
- spec/fixtures/basic.doc
|