search_in_file 1.0.0 → 1.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,29 +1,36 @@
1
1
  require "search_in_file/version"
2
- require 'find'
2
+ require "search_in_file/settings"
3
3
 
4
4
  require "file_parsers/txt_parser"
5
5
  require "file_parsers/pdf_parser"
6
- require "file_parsers/doc_parser"
6
+ require "file_parsers/doc_parser" if Gem.win_platform?
7
7
  require "file_parsers/docx_parser"
8
8
 
9
+ require "find"
10
+
9
11
  module SearchInFile
10
12
 
11
- # search files by phrase
12
- def self.search( dir_path, term )
13
+ def self.search( path, term )
14
+ is_document?( path ) ? search_in_file( path, term ) : search_in_directory( path, term )
15
+ end
16
+
17
+ def self.search_in_directory( path, term )
13
18
  results = []
14
- each_file_in( dir_path ) do |path|
15
- # read file
16
- parser_class = Object.const_get( "#{File.extname( path )[1..-1].capitalize}Parser" )
17
- file_content = parser_class.new.read_file( path )
18
- file_paragraphs = file_content.split(/\tor\n|\n/)
19
- # search for phrase
20
- paragraphs = []
21
- file_paragraphs.each{ |p| paragraphs << p if p.include?(term) }
22
- results << {file: path, paragraphs: paragraphs} if !paragraphs.empty?
19
+ each_file_in( path ) do |f_path|
20
+ f_result = search_in_file( f_path, term )
21
+ results = results + f_result if !f_result.empty?
23
22
  end
24
23
  results
25
24
  end
26
25
 
26
+ def self.search_in_file( f_path, term )
27
+ term_paragraphs = []
28
+ file_paragraphs = paragraphs_of( f_path )
29
+ # search for phrase
30
+ file_paragraphs.each{ |p| term_paragraphs << p if p.include?(term) }
31
+ term_paragraphs.empty? ? [] : [{file: f_path, paragraphs: term_paragraphs}]
32
+ end
33
+
27
34
  def self.content_of file
28
35
  class_name = "#{extname( file )[1..-1].capitalize}Parser"
29
36
  parser_class = Object.const_get( class_name )
@@ -31,7 +38,7 @@ module SearchInFile
31
38
  end
32
39
 
33
40
  def self.paragraphs_of file
34
- content_of( file ).split(/\tor\n|\n/)
41
+ content_of( file ).split(/\r/)
35
42
  end
36
43
 
37
44
  def self.find_by_type_in d_path, f_type
@@ -59,8 +66,7 @@ module SearchInFile
59
66
  end
60
67
 
61
68
  def self.is_document? f_name
62
- f_type = extname( f_name )
63
- ['.doc', '.docx', '.pdf', '.txt'].include? f_type
69
+ Settings.supported_docs.include?( extname(f_name) )
64
70
  end
65
71
 
66
72
  end
@@ -0,0 +1,17 @@
1
+ require 'rbconfig'
2
+
3
+ module Settings
4
+
5
+ def self.supported_docs
6
+ Gem.win_platform? ? ['.doc', '.docx', '.pdf', '.txt'] : ['.docx', '.pdf', '.txt']
7
+ end
8
+
9
+ def self.test_count
10
+ 1
11
+ end
12
+
13
+ def self.fixtures_path
14
+ File.expand_path('../../..', __FILE__) + '/spec/fixtures'
15
+ end
16
+
17
+ end
@@ -1,3 +1,3 @@
1
1
  module SearchInFile
2
- VERSION = "1.0.0"
2
+ VERSION = "1.0.1"
3
3
  end
@@ -9,7 +9,7 @@ Gem::Specification.new do |spec|
9
9
  spec.authors = ["bmalets"]
10
10
  spec.email = ["b.malets@gmail.com"]
11
11
  spec.summary = %q{Search files with phrase by directory path}
12
- spec.description = %q{Search files with phrase by directory path}
12
+ spec.description = %q{Search word or phrase in .pdf,.txt,.doc and .docx files by directory path}
13
13
  spec.homepage = "https://github.com/bmalets/search_in_file/blob/master/README.md"
14
14
  spec.license = "MIT"
15
15
 
@@ -4,23 +4,55 @@ describe "File Search System" do
4
4
 
5
5
  describe "Search engine tests" do
6
6
 
7
- 10.times do |number|
8
- it "Search phrase in directory number #{number}" do
7
+ Settings.test_count.times do |number|
9
8
 
10
- dir_path = File.expand_path('../..', __FILE__) + '/fixtures'
9
+ it "Search phrase in directory or in file number #{number}" do
10
+ dir_path = Settings.fixtures_path
11
11
  term = 'Often needs to retrieve documents on its'
12
12
  results = SearchInFile.search( dir_path, term )
13
13
 
14
- results.class == 'Array'
15
- results.count == 9
14
+ expect(results.class).to eq(Array)
15
+ expect(results.count).to eq( Gem.win_platform? ? 7 : 4)
16
+
17
+ results.each do |result|
18
+ Settings.supported_docs.include? File.extname(result[:file])
19
+ File.exist?( result[:file] ).should be(true)
20
+ result[:paragraphs].each{ |paragraph| expect( paragraph.include?(term) ).to be true }
21
+ end
22
+ end
23
+
24
+ it "Search phrase in directory number #{number}" do
25
+ dir_path = Settings.fixtures_path
26
+ term = 'Often needs to retrieve documents on its'
27
+ results = SearchInFile.search_in_directory( dir_path, term )
28
+
29
+ expect(results.class).to eq(Array)
30
+ expect(results.count).to eq( Gem.win_platform? ? 7 : 4)
16
31
 
17
- results.each do |result|
18
- ['.doc', '.docx', '.pdf', '.txt'].include? File.extname(result[:file])
19
- File.exist?( result[:file] ).should == true
20
-
21
- result[:paragraphs].each{ |paragraph| paragraph.include? term }
32
+ results.each do |result|
33
+ Settings.supported_docs.include? File.extname(result[:file])
34
+ File.exist?( result[:file] ).should be(true)
35
+ result[:paragraphs].each{ |paragraph| expect( paragraph.include?(term) ).to be true }
22
36
  end
23
37
  end
38
+
39
+ Settings.supported_docs.each do |f_type|
40
+
41
+ it "Search phrase in #{f_type} file number #{number}" do
42
+ f_path = Settings.fixtures_path + "/basic#{f_type}"
43
+ term = 'Often needs to retrieve documents on its'
44
+ results = SearchInFile.search_in_file( f_path, term )
45
+
46
+ expect(results.class).to eq(Array)
47
+ results.each do |result|
48
+ Settings.supported_docs.include? File.extname(result[:file])
49
+ expect( File.exist?( result[:file] ) ).to be true
50
+ result[:paragraphs].each{ |paragraph| paragraph.include? term }
51
+ end
52
+ end
53
+
54
+ end
55
+
24
56
  end
25
57
 
26
58
  end
@@ -4,40 +4,38 @@ describe "Folder Search System" do
4
4
 
5
5
  describe "Search in folder tests" do
6
6
 
7
- 10.times do |number|
7
+ Settings.test_count.times do |number|
8
+
8
9
  it "Search TXT files test number #{number}" do
9
- tmp_path = File.expand_path('../..', __FILE__) + '/fixtures'
10
- files = SearchInFile.find_by_type_in( tmp_path, '.txt' )
10
+ tmp_path = Settings.fixtures_path
11
+ files = SearchInFile.find_by_type_in( tmp_path, '.txt' )
11
12
  files.each{ |f| File.extname(f) == '.txt' }
12
13
  files.count.should == 2
13
14
  end
14
- end
15
15
 
16
- 10.times do |number|
16
+
17
17
  it "Search PDF files test number #{number}" do
18
- tmp_path = File.expand_path('../..', __FILE__) + '/fixtures'
18
+ tmp_path = Settings.fixtures_path
19
19
  files = SearchInFile.find_by_type_in( tmp_path, '.pdf' )
20
20
  files.each{ |f| File.extname(f) == '.pdf' }
21
- files.count.should == 3
21
+ files.count.should == 4
22
22
  end
23
- end
24
-
25
- 10.times do |number|
23
+
26
24
  it "Search DOC files test number #{number}" do
27
- tmp_path = File.expand_path('../..', __FILE__) + '/fixtures'
28
- files = SearchInFile.find_by_type_in( tmp_path, '.doc' )
25
+ tmp_path = Settings.fixtures_path
26
+ files = SearchInFile.find_by_type_in( tmp_path, '.doc' )
29
27
  files.each{ |f| File.extname(f) == '.doc' }
30
28
  files.count.should == 3
31
- end
32
- end
29
+ end
30
+
33
31
 
34
- 10.times do |number|
35
32
  it "Search DOCX files test number #{number}" do
36
- tmp_path = File.expand_path('../..', __FILE__) + '/fixtures'
37
- files = SearchInFile.find_by_type_in( tmp_path, '.docx' )
33
+ tmp_path = Settings.fixtures_path
34
+ files = SearchInFile.find_by_type_in( tmp_path, '.docx' )
38
35
  files.each{ |f| File.extname(f) == '.docx' }
39
36
  files.count.should == 2
40
37
  end
38
+
41
39
  end
42
40
 
43
41
  end
@@ -4,36 +4,35 @@ describe "File Search System" do
4
4
 
5
5
  describe "Document Parsers Testing" do
6
6
 
7
- 10.times do |number|
7
+ Settings.test_count.times do |number|
8
+
8
9
  it "Read TXT files test number #{number}" do
9
- tmp_path = File.expand_path('../..', __FILE__) + '/fixtures/basic.txt'
10
+ tmp_path = Settings.fixtures_path + '/basic.txt'
10
11
  data_read = TxtParser.new.read_file(tmp_path)
11
12
  data_read.include? 'Often needs to retrieve documents on its'
12
13
  end
13
- end
14
14
 
15
- 10.times do |number|
15
+
16
16
  it "Read DOC files test number #{number}" do
17
- tmp_path = File.expand_path('../..', __FILE__) + '/fixtures/basic.doc'
17
+ tmp_path = Settings.fixtures_path + '/basic.doc'
18
18
  data_read = DocParser.new.read_file(tmp_path)
19
19
  data_read.include? 'Often needs to retrieve documents on its'
20
- end
21
- end
20
+ end if Gem.win_platform?
21
+
22
22
 
23
- 10.times do |number|
24
23
  it "Read DOCX files test number #{number}" do
25
- tmp_path = File.expand_path('../..', __FILE__) + '/fixtures/basic.docx'
24
+ tmp_path = Settings.fixtures_path + '/basic.docx'
26
25
  data_read = DocxParser.new.read_file(tmp_path)
27
26
  data_read.include? 'Often needs to retrieve documents on its'
28
27
  end
29
- end
30
28
 
31
- 10.times do |number|
29
+
32
30
  it "Read PDF files test number #{number}" do
33
- tmp_path = File.expand_path('../..', __FILE__) + '/fixtures/basic.pdf'
31
+ tmp_path = Settings.fixtures_path + '/basic.pdf'
34
32
  data_read = PdfParser.new.read_file(tmp_path)
35
33
  data_read.include? 'Often needs to retrieve documents on its'
36
34
  end
35
+
37
36
  end
38
37
 
39
38
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: search_in_file
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.0
4
+ version: 1.0.1
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -91,7 +91,8 @@ dependencies:
91
91
  - - ! '>='
92
92
  - !ruby/object:Gem::Version
93
93
  version: '0'
94
- description: Search files with phrase by directory path
94
+ description: Search word or phrase in .pdf,.txt,.doc and .docx files by directory
95
+ path
95
96
  email:
96
97
  - b.malets@gmail.com
97
98
  executables: []
@@ -108,6 +109,7 @@ files:
108
109
  - lib/file_parsers/pdf_parser.rb
109
110
  - lib/file_parsers/txt_parser.rb
110
111
  - lib/search_in_file.rb
112
+ - lib/search_in_file/settings.rb
111
113
  - lib/search_in_file/version.rb
112
114
  - search_in_file.gemspec
113
115
  - spec/fixtures/basic.doc