pdf_search 0.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: 5506277019033c54c84b0aeec3deae307b304af0392f17bd7e5f1147feb07ad8
4
+ data.tar.gz: d0488eefb43bdd4cbba1e9bfc0656eefd3e4ff41762b70d4f1e8dc0558fd73c3
5
+ SHA512:
6
+ metadata.gz: 8a20e3fffff896a967b395bae72483d99f733bf1bc1c949c3ac0f4426bd066550741c3b9e664ce71b633fb0b4e2f5c34f958123cb758deb181aaabbd5e1b8bd0
7
+ data.tar.gz: a2555c899aba3123f5f99b985590d75927515044111d8a7556fae92848b6a39dc335f6aa29aa1d6a3e0dada710e2e8e07e0599277240f783faff489c3b52c699
data/bin/pdf_search ADDED
@@ -0,0 +1,18 @@
1
+ #!/usr/bin/env ruby
2
+ require 'pdfsearch'
3
+ require 'webrick'
4
+
5
+ `sudo service elasticsearch start`
6
+
7
+
8
+ gemDir = [File.dirname(__FILE__), '..']
9
+ indexPdfsScriptPath = File.expand_path(File.join(gemDir + ['lib', 'indexPdfs.rb']))
10
+ htmlDir = File.expand_path(File.join(gemDir + ['html']))
11
+
12
+
13
+ WEBrick::HTTPServer.new(:Port => 80, :DocumentRoot => htmlDir).start
14
+
15
+
16
+ # PdfSearch::Indexer.startDeamon
17
+
18
+
data/lib/pdf_dir.rb ADDED
@@ -0,0 +1,33 @@
1
+ #encoding: UTF-8
2
+ require 'pdf-reader'
3
+ require 'pry'
4
+
5
+ module PdfSearch
6
+ class PdfDir
7
+ def initialize(dir = '.')
8
+ @dir = dir
9
+ end
10
+
11
+ def pdf_file_paths
12
+ Dir.glob(File.join(File.expand_path(@dir), '*.pdf'))
13
+ end
14
+
15
+ def each_pdf
16
+ Enumerator.new do |e|
17
+ pdf_file_paths.each do |pdf_file_path|
18
+ e << PDF::Reader.new(pdf_file_path)
19
+ end
20
+ end
21
+ end
22
+
23
+ def each_page
24
+ Enumerator.new do |e|
25
+ each_pdf do |pdf_reader|
26
+ pdf_reader.pages.each do |page|
27
+ e << page
28
+ end
29
+ end
30
+ end
31
+ end
32
+ end
33
+ end
data/lib/pdf_index.rb ADDED
@@ -0,0 +1,5 @@
1
+ require 'pdfiterator'
2
+
3
+ PdfIterator.new.traverse_current_directory_pdf_texts do |pdf_text|
4
+ `echo "#{pdf_text}" >> log/indexPdfs.log`
5
+ end
data/lib/pdf_search.rb ADDED
@@ -0,0 +1,5 @@
1
+ require 'pdf_index'
2
+ require 'pdf_dir'
3
+
4
+ module PdfSearch
5
+ end
metadata ADDED
@@ -0,0 +1,48 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: pdf_search
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.0
5
+ platform: ruby
6
+ authors:
7
+ - Manuel Arno Korfmann
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2010-04-28 00:00:00.000000000 Z
12
+ dependencies: []
13
+ description: Run in any directory containing pdfs using `$ pdf_search`
14
+ email: manu@korfmann.info
15
+ executables:
16
+ - pdf_search
17
+ extensions: []
18
+ extra_rdoc_files: []
19
+ files:
20
+ - bin/pdf_search
21
+ - lib/pdf_dir.rb
22
+ - lib/pdf_index.rb
23
+ - lib/pdf_search.rb
24
+ homepage: https://github.com/banalBI/pdfsearch
25
+ licenses:
26
+ - MIT
27
+ metadata: {}
28
+ post_install_message:
29
+ rdoc_options: []
30
+ require_paths:
31
+ - lib
32
+ required_ruby_version: !ruby/object:Gem::Requirement
33
+ requirements:
34
+ - - ">="
35
+ - !ruby/object:Gem::Version
36
+ version: '0'
37
+ required_rubygems_version: !ruby/object:Gem::Requirement
38
+ requirements:
39
+ - - ">="
40
+ - !ruby/object:Gem::Version
41
+ version: '0'
42
+ requirements: []
43
+ rubyforge_project:
44
+ rubygems_version: 2.7.6
45
+ signing_key:
46
+ specification_version: 4
47
+ summary: Searching pdfs by leveragin Elasticsearch
48
+ test_files: []