search_in_file 1.0.0 → 1.0.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,29 +1,36 @@
1
1
  require "search_in_file/version"
2
- require 'find'
2
+ require "search_in_file/settings"
3
3
 
4
4
  require "file_parsers/txt_parser"
5
5
  require "file_parsers/pdf_parser"
6
- require "file_parsers/doc_parser"
6
+ require "file_parsers/doc_parser" if Gem.win_platform?
7
7
  require "file_parsers/docx_parser"
8
8
 
9
+ require "find"
10
+
9
11
  module SearchInFile
10
12
 
11
- # search files by phrase
12
- def self.search( dir_path, term )
13
+ def self.search( path, term )
14
+ is_document?( path ) ? search_in_file( path, term ) : search_in_directory( path, term )
15
+ end
16
+
17
+ def self.search_in_directory( path, term )
13
18
  results = []
14
- each_file_in( dir_path ) do |path|
15
- # read file
16
- parser_class = Object.const_get( "#{File.extname( path )[1..-1].capitalize}Parser" )
17
- file_content = parser_class.new.read_file( path )
18
- file_paragraphs = file_content.split(/\tor\n|\n/)
19
- # search for phrase
20
- paragraphs = []
21
- file_paragraphs.each{ |p| paragraphs << p if p.include?(term) }
22
- results << {file: path, paragraphs: paragraphs} if !paragraphs.empty?
19
+ each_file_in( path ) do |f_path|
20
+ f_result = search_in_file( f_path, term )
21
+ results = results + f_result if !f_result.empty?
23
22
  end
24
23
  results
25
24
  end
26
25
 
26
+ def self.search_in_file( f_path, term )
27
+ term_paragraphs = []
28
+ file_paragraphs = paragraphs_of( f_path )
29
+ # search for phrase
30
+ file_paragraphs.each{ |p| term_paragraphs << p if p.include?(term) }
31
+ term_paragraphs.empty? ? [] : [{file: f_path, paragraphs: term_paragraphs}]
32
+ end
33
+
27
34
  def self.content_of file
28
35
  class_name = "#{extname( file )[1..-1].capitalize}Parser"
29
36
  parser_class = Object.const_get( class_name )
@@ -31,7 +38,7 @@ module SearchInFile
31
38
  end
32
39
 
33
40
  def self.paragraphs_of file
34
- content_of( file ).split(/\tor\n|\n/)
41
+ content_of( file ).split(/\r/)
35
42
  end
36
43
 
37
44
  def self.find_by_type_in d_path, f_type
@@ -59,8 +66,7 @@ module SearchInFile
59
66
  end
60
67
 
61
68
  def self.is_document? f_name
62
- f_type = extname( f_name )
63
- ['.doc', '.docx', '.pdf', '.txt'].include? f_type
69
+ Settings.supported_docs.include?( extname(f_name) )
64
70
  end
65
71
 
66
72
  end
@@ -0,0 +1,17 @@
1
+ require 'rbconfig'
2
+
3
+ module Settings
4
+
5
+ def self.supported_docs
6
+ Gem.win_platform? ? ['.doc', '.docx', '.pdf', '.txt'] : ['.docx', '.pdf', '.txt']
7
+ end
8
+
9
+ def self.test_count
10
+ 1
11
+ end
12
+
13
+ def self.fixtures_path
14
+ File.expand_path('../../..', __FILE__) + '/spec/fixtures'
15
+ end
16
+
17
+ end
@@ -1,3 +1,3 @@
1
1
  module SearchInFile
2
- VERSION = "1.0.0"
2
+ VERSION = "1.0.1"
3
3
  end
@@ -9,7 +9,7 @@ Gem::Specification.new do |spec|
9
9
  spec.authors = ["bmalets"]
10
10
  spec.email = ["b.malets@gmail.com"]
11
11
  spec.summary = %q{Search files with phrase by directory path}
12
- spec.description = %q{Search files with phrase by directory path}
12
+ spec.description = %q{Search word or phrase in .pdf,.txt,.doc and .docx files by directory path}
13
13
  spec.homepage = "https://github.com/bmalets/search_in_file/blob/master/README.md"
14
14
  spec.license = "MIT"
15
15
 
@@ -4,23 +4,55 @@ describe "File Search System" do
4
4
 
5
5
  describe "Search engine tests" do
6
6
 
7
- 10.times do |number|
8
- it "Search phrase in directory number #{number}" do
7
+ Settings.test_count.times do |number|
9
8
 
10
- dir_path = File.expand_path('../..', __FILE__) + '/fixtures'
9
+ it "Search phrase in directory or in file number #{number}" do
10
+ dir_path = Settings.fixtures_path
11
11
  term = 'Often needs to retrieve documents on its'
12
12
  results = SearchInFile.search( dir_path, term )
13
13
 
14
- results.class == 'Array'
15
- results.count == 9
14
+ expect(results.class).to eq(Array)
15
+ expect(results.count).to eq( Gem.win_platform? ? 7 : 4)
16
+
17
+ results.each do |result|
18
+ Settings.supported_docs.include? File.extname(result[:file])
19
+ File.exist?( result[:file] ).should be(true)
20
+ result[:paragraphs].each{ |paragraph| expect( paragraph.include?(term) ).to be true }
21
+ end
22
+ end
23
+
24
+ it "Search phrase in directory number #{number}" do
25
+ dir_path = Settings.fixtures_path
26
+ term = 'Often needs to retrieve documents on its'
27
+ results = SearchInFile.search_in_directory( dir_path, term )
28
+
29
+ expect(results.class).to eq(Array)
30
+ expect(results.count).to eq( Gem.win_platform? ? 7 : 4)
16
31
 
17
- results.each do |result|
18
- ['.doc', '.docx', '.pdf', '.txt'].include? File.extname(result[:file])
19
- File.exist?( result[:file] ).should == true
20
-
21
- result[:paragraphs].each{ |paragraph| paragraph.include? term }
32
+ results.each do |result|
33
+ Settings.supported_docs.include? File.extname(result[:file])
34
+ File.exist?( result[:file] ).should be(true)
35
+ result[:paragraphs].each{ |paragraph| expect( paragraph.include?(term) ).to be true }
22
36
  end
23
37
  end
38
+
39
+ Settings.supported_docs.each do |f_type|
40
+
41
+ it "Search phrase in #{f_type} file number #{number}" do
42
+ f_path = Settings.fixtures_path + "/basic#{f_type}"
43
+ term = 'Often needs to retrieve documents on its'
44
+ results = SearchInFile.search_in_file( f_path, term )
45
+
46
+ expect(results.class).to eq(Array)
47
+ results.each do |result|
48
+ Settings.supported_docs.include? File.extname(result[:file])
49
+ expect( File.exist?( result[:file] ) ).to be true
50
+ result[:paragraphs].each{ |paragraph| paragraph.include? term }
51
+ end
52
+ end
53
+
54
+ end
55
+
24
56
  end
25
57
 
26
58
  end
@@ -4,40 +4,38 @@ describe "Folder Search System" do
4
4
 
5
5
  describe "Search in folder tests" do
6
6
 
7
- 10.times do |number|
7
+ Settings.test_count.times do |number|
8
+
8
9
  it "Search TXT files test number #{number}" do
9
- tmp_path = File.expand_path('../..', __FILE__) + '/fixtures'
10
- files = SearchInFile.find_by_type_in( tmp_path, '.txt' )
10
+ tmp_path = Settings.fixtures_path
11
+ files = SearchInFile.find_by_type_in( tmp_path, '.txt' )
11
12
  files.each{ |f| File.extname(f) == '.txt' }
12
13
  files.count.should == 2
13
14
  end
14
- end
15
15
 
16
- 10.times do |number|
16
+
17
17
  it "Search PDF files test number #{number}" do
18
- tmp_path = File.expand_path('../..', __FILE__) + '/fixtures'
18
+ tmp_path = Settings.fixtures_path
19
19
  files = SearchInFile.find_by_type_in( tmp_path, '.pdf' )
20
20
  files.each{ |f| File.extname(f) == '.pdf' }
21
- files.count.should == 3
21
+ files.count.should == 4
22
22
  end
23
- end
24
-
25
- 10.times do |number|
23
+
26
24
  it "Search DOC files test number #{number}" do
27
- tmp_path = File.expand_path('../..', __FILE__) + '/fixtures'
28
- files = SearchInFile.find_by_type_in( tmp_path, '.doc' )
25
+ tmp_path = Settings.fixtures_path
26
+ files = SearchInFile.find_by_type_in( tmp_path, '.doc' )
29
27
  files.each{ |f| File.extname(f) == '.doc' }
30
28
  files.count.should == 3
31
- end
32
- end
29
+ end
30
+
33
31
 
34
- 10.times do |number|
35
32
  it "Search DOCX files test number #{number}" do
36
- tmp_path = File.expand_path('../..', __FILE__) + '/fixtures'
37
- files = SearchInFile.find_by_type_in( tmp_path, '.docx' )
33
+ tmp_path = Settings.fixtures_path
34
+ files = SearchInFile.find_by_type_in( tmp_path, '.docx' )
38
35
  files.each{ |f| File.extname(f) == '.docx' }
39
36
  files.count.should == 2
40
37
  end
38
+
41
39
  end
42
40
 
43
41
  end
@@ -4,36 +4,35 @@ describe "File Search System" do
4
4
 
5
5
  describe "Document Parsers Testing" do
6
6
 
7
- 10.times do |number|
7
+ Settings.test_count.times do |number|
8
+
8
9
  it "Read TXT files test number #{number}" do
9
- tmp_path = File.expand_path('../..', __FILE__) + '/fixtures/basic.txt'
10
+ tmp_path = Settings.fixtures_path + '/basic.txt'
10
11
  data_read = TxtParser.new.read_file(tmp_path)
11
12
  data_read.include? 'Often needs to retrieve documents on its'
12
13
  end
13
- end
14
14
 
15
- 10.times do |number|
15
+
16
16
  it "Read DOC files test number #{number}" do
17
- tmp_path = File.expand_path('../..', __FILE__) + '/fixtures/basic.doc'
17
+ tmp_path = Settings.fixtures_path + '/basic.doc'
18
18
  data_read = DocParser.new.read_file(tmp_path)
19
19
  data_read.include? 'Often needs to retrieve documents on its'
20
- end
21
- end
20
+ end if Gem.win_platform?
21
+
22
22
 
23
- 10.times do |number|
24
23
  it "Read DOCX files test number #{number}" do
25
- tmp_path = File.expand_path('../..', __FILE__) + '/fixtures/basic.docx'
24
+ tmp_path = Settings.fixtures_path + '/basic.docx'
26
25
  data_read = DocxParser.new.read_file(tmp_path)
27
26
  data_read.include? 'Often needs to retrieve documents on its'
28
27
  end
29
- end
30
28
 
31
- 10.times do |number|
29
+
32
30
  it "Read PDF files test number #{number}" do
33
- tmp_path = File.expand_path('../..', __FILE__) + '/fixtures/basic.pdf'
31
+ tmp_path = Settings.fixtures_path + '/basic.pdf'
34
32
  data_read = PdfParser.new.read_file(tmp_path)
35
33
  data_read.include? 'Often needs to retrieve documents on its'
36
34
  end
35
+
37
36
  end
38
37
 
39
38
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: search_in_file
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.0
4
+ version: 1.0.1
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -91,7 +91,8 @@ dependencies:
91
91
  - - ! '>='
92
92
  - !ruby/object:Gem::Version
93
93
  version: '0'
94
- description: Search files with phrase by directory path
94
+ description: Search word or phrase in .pdf,.txt,.doc and .docx files by directory
95
+ path
95
96
  email:
96
97
  - b.malets@gmail.com
97
98
  executables: []
@@ -108,6 +109,7 @@ files:
108
109
  - lib/file_parsers/pdf_parser.rb
109
110
  - lib/file_parsers/txt_parser.rb
110
111
  - lib/search_in_file.rb
112
+ - lib/search_in_file/settings.rb
111
113
  - lib/search_in_file/version.rb
112
114
  - search_in_file.gemspec
113
115
  - spec/fixtures/basic.doc