pdf_search 0.0.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: 5506277019033c54c84b0aeec3deae307b304af0392f17bd7e5f1147feb07ad8
4
+ data.tar.gz: d0488eefb43bdd4cbba1e9bfc0656eefd3e4ff41762b70d4f1e8dc0558fd73c3
5
+ SHA512:
6
+ metadata.gz: 8a20e3fffff896a967b395bae72483d99f733bf1bc1c949c3ac0f4426bd066550741c3b9e664ce71b633fb0b4e2f5c34f958123cb758deb181aaabbd5e1b8bd0
7
+ data.tar.gz: a2555c899aba3123f5f99b985590d75927515044111d8a7556fae92848b6a39dc335f6aa29aa1d6a3e0dada710e2e8e07e0599277240f783faff489c3b52c699
data/bin/pdf_search ADDED
@@ -0,0 +1,18 @@
1
+ #!/usr/bin/env ruby
2
+ require 'pdfsearch'
3
+ require 'webrick'
4
+
5
+ `sudo service elasticsearch start`
6
+
7
+
8
+ gemDir = [File.dirname(__FILE__), '..']
9
+ indexPdfsScriptPath = File.expand_path(File.join(gemDir + ['lib', 'indexPdfs.rb']))
10
+ htmlDir = File.expand_path(File.join(gemDir + ['html']))
11
+
12
+
13
+ WEBrick::HTTPServer.new(:Port => 80, :DocumentRoot => htmlDir).start
14
+
15
+
16
+ # PdfSearch::Indexer.startDeamon
17
+
18
+
data/lib/pdf_dir.rb ADDED
@@ -0,0 +1,33 @@
1
+ #encoding: UTF-8
2
+ require 'pdf-reader'
3
+ require 'pry'
4
+
5
+ module PdfSearch
6
+ class PdfDir
7
+ def initialize(dir = '.')
8
+ @dir = dir
9
+ end
10
+
11
+ def pdf_file_paths
12
+ Dir.glob(File.join(File.expand_path(@dir), '*.pdf'))
13
+ end
14
+
15
+ def each_pdf
16
+ Enumerator.new do |e|
17
+ pdf_file_paths.each do |pdf_file_path|
18
+ e << PDF::Reader.new(pdf_file_path)
19
+ end
20
+ end
21
+ end
22
+
23
+ def each_page
24
+ Enumerator.new do |e|
25
+ each_pdf do |pdf_reader|
26
+ pdf_reader.pages.each do |page|
27
+ e << page
28
+ end
29
+ end
30
+ end
31
+ end
32
+ end
33
+ end
data/lib/pdf_index.rb ADDED
@@ -0,0 +1,5 @@
1
+ require 'pdfiterator'
2
+
3
+ PdfIterator.new.traverse_current_directory_pdf_texts do |pdf_text|
4
+ `echo "#{pdf_text}" >> log/indexPdfs.log`
5
+ end
data/lib/pdf_search.rb ADDED
@@ -0,0 +1,5 @@
1
+ require 'pdf_index'
2
+ require 'pdf_dir'
3
+
4
+ module PdfSearch
5
+ end
metadata ADDED
@@ -0,0 +1,48 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: pdf_search
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.0
5
+ platform: ruby
6
+ authors:
7
+ - Manuel Arno Korfmann
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2010-04-28 00:00:00.000000000 Z
12
+ dependencies: []
13
+ description: Run in any directory containing pdfs using `$ pdf_search`
14
+ email: manu@korfmann.info
15
+ executables:
16
+ - pdf_search
17
+ extensions: []
18
+ extra_rdoc_files: []
19
+ files:
20
+ - bin/pdf_search
21
+ - lib/pdf_dir.rb
22
+ - lib/pdf_index.rb
23
+ - lib/pdf_search.rb
24
+ homepage: https://github.com/banalBI/pdfsearch
25
+ licenses:
26
+ - MIT
27
+ metadata: {}
28
+ post_install_message:
29
+ rdoc_options: []
30
+ require_paths:
31
+ - lib
32
+ required_ruby_version: !ruby/object:Gem::Requirement
33
+ requirements:
34
+ - - ">="
35
+ - !ruby/object:Gem::Version
36
+ version: '0'
37
+ required_rubygems_version: !ruby/object:Gem::Requirement
38
+ requirements:
39
+ - - ">="
40
+ - !ruby/object:Gem::Version
41
+ version: '0'
42
+ requirements: []
43
+ rubyforge_project:
44
+ rubygems_version: 2.7.6
45
+ signing_key:
46
+ specification_version: 4
47
+ summary: Searching pdfs by leveragin Elasticsearch
48
+ test_files: []