search_in_file 1.0.0 → 1.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/search_in_file.rb +22 -16
- data/lib/search_in_file/settings.rb +17 -0
- data/lib/search_in_file/version.rb +1 -1
- data/search_in_file.gemspec +1 -1
- data/spec/specs/search_engine_spec.rb +42 -10
- data/spec/specs/search_files_spec.rb +15 -17
- data/spec/specs/search_phrase_spec.rb +11 -12
- metadata +4 -2
data/lib/search_in_file.rb
CHANGED
@@ -1,29 +1,36 @@
|
|
1
1
|
require "search_in_file/version"
|
2
|
-
require
|
2
|
+
require "search_in_file/settings"
|
3
3
|
|
4
4
|
require "file_parsers/txt_parser"
|
5
5
|
require "file_parsers/pdf_parser"
|
6
|
-
require "file_parsers/doc_parser"
|
6
|
+
require "file_parsers/doc_parser" if Gem.win_platform?
|
7
7
|
require "file_parsers/docx_parser"
|
8
8
|
|
9
|
+
require "find"
|
10
|
+
|
9
11
|
module SearchInFile
|
10
12
|
|
11
|
-
|
12
|
-
|
13
|
+
def self.search( path, term )
|
14
|
+
is_document?( path ) ? search_in_file( path, term ) : search_in_directory( path, term )
|
15
|
+
end
|
16
|
+
|
17
|
+
def self.search_in_directory( path, term )
|
13
18
|
results = []
|
14
|
-
each_file_in(
|
15
|
-
|
16
|
-
|
17
|
-
file_content = parser_class.new.read_file( path )
|
18
|
-
file_paragraphs = file_content.split(/\tor\n|\n/)
|
19
|
-
# search for phrase
|
20
|
-
paragraphs = []
|
21
|
-
file_paragraphs.each{ |p| paragraphs << p if p.include?(term) }
|
22
|
-
results << {file: path, paragraphs: paragraphs} if !paragraphs.empty?
|
19
|
+
each_file_in( path ) do |f_path|
|
20
|
+
f_result = search_in_file( f_path, term )
|
21
|
+
results = results + f_result if !f_result.empty?
|
23
22
|
end
|
24
23
|
results
|
25
24
|
end
|
26
25
|
|
26
|
+
def self.search_in_file( f_path, term )
|
27
|
+
term_paragraphs = []
|
28
|
+
file_paragraphs = paragraphs_of( f_path )
|
29
|
+
# search for phrase
|
30
|
+
file_paragraphs.each{ |p| term_paragraphs << p if p.include?(term) }
|
31
|
+
term_paragraphs.empty? ? [] : [{file: f_path, paragraphs: term_paragraphs}]
|
32
|
+
end
|
33
|
+
|
27
34
|
def self.content_of file
|
28
35
|
class_name = "#{extname( file )[1..-1].capitalize}Parser"
|
29
36
|
parser_class = Object.const_get( class_name )
|
@@ -31,7 +38,7 @@ module SearchInFile
|
|
31
38
|
end
|
32
39
|
|
33
40
|
def self.paragraphs_of file
|
34
|
-
content_of( file ).split(/\
|
41
|
+
content_of( file ).split(/\r/)
|
35
42
|
end
|
36
43
|
|
37
44
|
def self.find_by_type_in d_path, f_type
|
@@ -59,8 +66,7 @@ module SearchInFile
|
|
59
66
|
end
|
60
67
|
|
61
68
|
def self.is_document? f_name
|
62
|
-
|
63
|
-
['.doc', '.docx', '.pdf', '.txt'].include? f_type
|
69
|
+
Settings.supported_docs.include?( extname(f_name) )
|
64
70
|
end
|
65
71
|
|
66
72
|
end
|
@@ -0,0 +1,17 @@
|
|
1
|
+
require 'rbconfig'
|
2
|
+
|
3
|
+
module Settings
|
4
|
+
|
5
|
+
def self.supported_docs
|
6
|
+
Gem.win_platform? ? ['.doc', '.docx', '.pdf', '.txt'] : ['.docx', '.pdf', '.txt']
|
7
|
+
end
|
8
|
+
|
9
|
+
def self.test_count
|
10
|
+
1
|
11
|
+
end
|
12
|
+
|
13
|
+
def self.fixtures_path
|
14
|
+
File.expand_path('../../..', __FILE__) + '/spec/fixtures'
|
15
|
+
end
|
16
|
+
|
17
|
+
end
|
data/search_in_file.gemspec
CHANGED
@@ -9,7 +9,7 @@ Gem::Specification.new do |spec|
|
|
9
9
|
spec.authors = ["bmalets"]
|
10
10
|
spec.email = ["b.malets@gmail.com"]
|
11
11
|
spec.summary = %q{Search files with phrase by directory path}
|
12
|
-
spec.description = %q{Search
|
12
|
+
spec.description = %q{Search word or phrase in .pdf,.txt,.doc and .docx files by directory path}
|
13
13
|
spec.homepage = "https://github.com/bmalets/search_in_file/blob/master/README.md"
|
14
14
|
spec.license = "MIT"
|
15
15
|
|
@@ -4,23 +4,55 @@ describe "File Search System" do
|
|
4
4
|
|
5
5
|
describe "Search engine tests" do
|
6
6
|
|
7
|
-
|
8
|
-
it "Search phrase in directory number #{number}" do
|
7
|
+
Settings.test_count.times do |number|
|
9
8
|
|
10
|
-
|
9
|
+
it "Search phrase in directory or in file number #{number}" do
|
10
|
+
dir_path = Settings.fixtures_path
|
11
11
|
term = 'Often needs to retrieve documents on its'
|
12
12
|
results = SearchInFile.search( dir_path, term )
|
13
13
|
|
14
|
-
results.class
|
15
|
-
results.count
|
14
|
+
expect(results.class).to eq(Array)
|
15
|
+
expect(results.count).to eq( Gem.win_platform? ? 7 : 4)
|
16
|
+
|
17
|
+
results.each do |result|
|
18
|
+
Settings.supported_docs.include? File.extname(result[:file])
|
19
|
+
File.exist?( result[:file] ).should be(true)
|
20
|
+
result[:paragraphs].each{ |paragraph| expect( paragraph.include?(term) ).to be true }
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
24
|
+
it "Search phrase in directory number #{number}" do
|
25
|
+
dir_path = Settings.fixtures_path
|
26
|
+
term = 'Often needs to retrieve documents on its'
|
27
|
+
results = SearchInFile.search_in_directory( dir_path, term )
|
28
|
+
|
29
|
+
expect(results.class).to eq(Array)
|
30
|
+
expect(results.count).to eq( Gem.win_platform? ? 7 : 4)
|
16
31
|
|
17
|
-
results.each do |result|
|
18
|
-
|
19
|
-
File.exist?( result[:file] ).should
|
20
|
-
|
21
|
-
result[:paragraphs].each{ |paragraph| paragraph.include? term }
|
32
|
+
results.each do |result|
|
33
|
+
Settings.supported_docs.include? File.extname(result[:file])
|
34
|
+
File.exist?( result[:file] ).should be(true)
|
35
|
+
result[:paragraphs].each{ |paragraph| expect( paragraph.include?(term) ).to be true }
|
22
36
|
end
|
23
37
|
end
|
38
|
+
|
39
|
+
Settings.supported_docs.each do |f_type|
|
40
|
+
|
41
|
+
it "Search phrase in #{f_type} file number #{number}" do
|
42
|
+
f_path = Settings.fixtures_path + "/basic#{f_type}"
|
43
|
+
term = 'Often needs to retrieve documents on its'
|
44
|
+
results = SearchInFile.search_in_file( f_path, term )
|
45
|
+
|
46
|
+
expect(results.class).to eq(Array)
|
47
|
+
results.each do |result|
|
48
|
+
Settings.supported_docs.include? File.extname(result[:file])
|
49
|
+
expect( File.exist?( result[:file] ) ).to be true
|
50
|
+
result[:paragraphs].each{ |paragraph| paragraph.include? term }
|
51
|
+
end
|
52
|
+
end
|
53
|
+
|
54
|
+
end
|
55
|
+
|
24
56
|
end
|
25
57
|
|
26
58
|
end
|
@@ -4,40 +4,38 @@ describe "Folder Search System" do
|
|
4
4
|
|
5
5
|
describe "Search in folder tests" do
|
6
6
|
|
7
|
-
|
7
|
+
Settings.test_count.times do |number|
|
8
|
+
|
8
9
|
it "Search TXT files test number #{number}" do
|
9
|
-
tmp_path =
|
10
|
-
files
|
10
|
+
tmp_path = Settings.fixtures_path
|
11
|
+
files = SearchInFile.find_by_type_in( tmp_path, '.txt' )
|
11
12
|
files.each{ |f| File.extname(f) == '.txt' }
|
12
13
|
files.count.should == 2
|
13
14
|
end
|
14
|
-
end
|
15
15
|
|
16
|
-
|
16
|
+
|
17
17
|
it "Search PDF files test number #{number}" do
|
18
|
-
tmp_path =
|
18
|
+
tmp_path = Settings.fixtures_path
|
19
19
|
files = SearchInFile.find_by_type_in( tmp_path, '.pdf' )
|
20
20
|
files.each{ |f| File.extname(f) == '.pdf' }
|
21
|
-
files.count.should ==
|
21
|
+
files.count.should == 4
|
22
22
|
end
|
23
|
-
|
24
|
-
|
25
|
-
10.times do |number|
|
23
|
+
|
26
24
|
it "Search DOC files test number #{number}" do
|
27
|
-
tmp_path =
|
28
|
-
files
|
25
|
+
tmp_path = Settings.fixtures_path
|
26
|
+
files = SearchInFile.find_by_type_in( tmp_path, '.doc' )
|
29
27
|
files.each{ |f| File.extname(f) == '.doc' }
|
30
28
|
files.count.should == 3
|
31
|
-
end
|
32
|
-
|
29
|
+
end
|
30
|
+
|
33
31
|
|
34
|
-
10.times do |number|
|
35
32
|
it "Search DOCX files test number #{number}" do
|
36
|
-
tmp_path =
|
37
|
-
files
|
33
|
+
tmp_path = Settings.fixtures_path
|
34
|
+
files = SearchInFile.find_by_type_in( tmp_path, '.docx' )
|
38
35
|
files.each{ |f| File.extname(f) == '.docx' }
|
39
36
|
files.count.should == 2
|
40
37
|
end
|
38
|
+
|
41
39
|
end
|
42
40
|
|
43
41
|
end
|
@@ -4,36 +4,35 @@ describe "File Search System" do
|
|
4
4
|
|
5
5
|
describe "Document Parsers Testing" do
|
6
6
|
|
7
|
-
|
7
|
+
Settings.test_count.times do |number|
|
8
|
+
|
8
9
|
it "Read TXT files test number #{number}" do
|
9
|
-
tmp_path =
|
10
|
+
tmp_path = Settings.fixtures_path + '/basic.txt'
|
10
11
|
data_read = TxtParser.new.read_file(tmp_path)
|
11
12
|
data_read.include? 'Often needs to retrieve documents on its'
|
12
13
|
end
|
13
|
-
end
|
14
14
|
|
15
|
-
|
15
|
+
|
16
16
|
it "Read DOC files test number #{number}" do
|
17
|
-
tmp_path =
|
17
|
+
tmp_path = Settings.fixtures_path + '/basic.doc'
|
18
18
|
data_read = DocParser.new.read_file(tmp_path)
|
19
19
|
data_read.include? 'Often needs to retrieve documents on its'
|
20
|
-
end
|
21
|
-
|
20
|
+
end if Gem.win_platform?
|
21
|
+
|
22
22
|
|
23
|
-
10.times do |number|
|
24
23
|
it "Read DOCX files test number #{number}" do
|
25
|
-
tmp_path =
|
24
|
+
tmp_path = Settings.fixtures_path + '/basic.docx'
|
26
25
|
data_read = DocxParser.new.read_file(tmp_path)
|
27
26
|
data_read.include? 'Often needs to retrieve documents on its'
|
28
27
|
end
|
29
|
-
end
|
30
28
|
|
31
|
-
|
29
|
+
|
32
30
|
it "Read PDF files test number #{number}" do
|
33
|
-
tmp_path =
|
31
|
+
tmp_path = Settings.fixtures_path + '/basic.pdf'
|
34
32
|
data_read = PdfParser.new.read_file(tmp_path)
|
35
33
|
data_read.include? 'Often needs to retrieve documents on its'
|
36
34
|
end
|
35
|
+
|
37
36
|
end
|
38
37
|
|
39
38
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: search_in_file
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.0.
|
4
|
+
version: 1.0.1
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -91,7 +91,8 @@ dependencies:
|
|
91
91
|
- - ! '>='
|
92
92
|
- !ruby/object:Gem::Version
|
93
93
|
version: '0'
|
94
|
-
description: Search
|
94
|
+
description: Search word or phrase in .pdf,.txt,.doc and .docx files by directory
|
95
|
+
path
|
95
96
|
email:
|
96
97
|
- b.malets@gmail.com
|
97
98
|
executables: []
|
@@ -108,6 +109,7 @@ files:
|
|
108
109
|
- lib/file_parsers/pdf_parser.rb
|
109
110
|
- lib/file_parsers/txt_parser.rb
|
110
111
|
- lib/search_in_file.rb
|
112
|
+
- lib/search_in_file/settings.rb
|
111
113
|
- lib/search_in_file/version.rb
|
112
114
|
- search_in_file.gemspec
|
113
115
|
- spec/fixtures/basic.doc
|