pdf_search 0.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/bin/pdf_search +18 -0
- data/lib/pdf_dir.rb +33 -0
- data/lib/pdf_index.rb +5 -0
- data/lib/pdf_search.rb +5 -0
- metadata +48 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: 5506277019033c54c84b0aeec3deae307b304af0392f17bd7e5f1147feb07ad8
|
4
|
+
data.tar.gz: d0488eefb43bdd4cbba1e9bfc0656eefd3e4ff41762b70d4f1e8dc0558fd73c3
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 8a20e3fffff896a967b395bae72483d99f733bf1bc1c949c3ac0f4426bd066550741c3b9e664ce71b633fb0b4e2f5c34f958123cb758deb181aaabbd5e1b8bd0
|
7
|
+
data.tar.gz: a2555c899aba3123f5f99b985590d75927515044111d8a7556fae92848b6a39dc335f6aa29aa1d6a3e0dada710e2e8e07e0599277240f783faff489c3b52c699
|
data/bin/pdf_search
ADDED
@@ -0,0 +1,18 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
require 'pdfsearch'
|
3
|
+
require 'webrick'
|
4
|
+
|
5
|
+
`sudo service elasticsearch start`
|
6
|
+
|
7
|
+
|
8
|
+
gemDir = [File.dirname(__FILE__), '..']
|
9
|
+
indexPdfsScriptPath = File.expand_path(File.join(gemDir + ['lib', 'indexPdfs.rb']))
|
10
|
+
htmlDir = File.expand_path(File.join(gemDir + ['html']))
|
11
|
+
|
12
|
+
|
13
|
+
WEBrick::HTTPServer.new(:Port => 80, :DocumentRoot => htmlDir).start
|
14
|
+
|
15
|
+
|
16
|
+
# PdfSearch::Indexer.startDeamon
|
17
|
+
|
18
|
+
|
data/lib/pdf_dir.rb
ADDED
@@ -0,0 +1,33 @@
|
|
1
|
+
#encoding: UTF-8
|
2
|
+
require 'pdf-reader'
|
3
|
+
require 'pry'
|
4
|
+
|
5
|
+
module PdfSearch
|
6
|
+
class PdfDir
|
7
|
+
def initialize(dir = '.')
|
8
|
+
@dir = dir
|
9
|
+
end
|
10
|
+
|
11
|
+
def pdf_file_paths
|
12
|
+
Dir.glob(File.join(File.expand_path(@dir), '*.pdf'))
|
13
|
+
end
|
14
|
+
|
15
|
+
def each_pdf
|
16
|
+
Enumerator.new do |e|
|
17
|
+
pdf_file_paths.each do |pdf_file_path|
|
18
|
+
e << PDF::Reader.new(pdf_file_path)
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
23
|
+
def each_page
|
24
|
+
Enumerator.new do |e|
|
25
|
+
each_pdf do |pdf_reader|
|
26
|
+
pdf_reader.pages.each do |page|
|
27
|
+
e << page
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
33
|
+
end
|
data/lib/pdf_index.rb
ADDED
data/lib/pdf_search.rb
ADDED
metadata
ADDED
@@ -0,0 +1,48 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: pdf_search
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.0
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Manuel Arno Korfmann
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2010-04-28 00:00:00.000000000 Z
|
12
|
+
dependencies: []
|
13
|
+
description: Run in any directory containing pdfs using `$ pdf_search`
|
14
|
+
email: manu@korfmann.info
|
15
|
+
executables:
|
16
|
+
- pdf_search
|
17
|
+
extensions: []
|
18
|
+
extra_rdoc_files: []
|
19
|
+
files:
|
20
|
+
- bin/pdf_search
|
21
|
+
- lib/pdf_dir.rb
|
22
|
+
- lib/pdf_index.rb
|
23
|
+
- lib/pdf_search.rb
|
24
|
+
homepage: https://github.com/banalBI/pdfsearch
|
25
|
+
licenses:
|
26
|
+
- MIT
|
27
|
+
metadata: {}
|
28
|
+
post_install_message:
|
29
|
+
rdoc_options: []
|
30
|
+
require_paths:
|
31
|
+
- lib
|
32
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
33
|
+
requirements:
|
34
|
+
- - ">="
|
35
|
+
- !ruby/object:Gem::Version
|
36
|
+
version: '0'
|
37
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
38
|
+
requirements:
|
39
|
+
- - ">="
|
40
|
+
- !ruby/object:Gem::Version
|
41
|
+
version: '0'
|
42
|
+
requirements: []
|
43
|
+
rubyforge_project:
|
44
|
+
rubygems_version: 2.7.6
|
45
|
+
signing_key:
|
46
|
+
specification_version: 4
|
47
|
+
summary: Searching pdfs by leveragin Elasticsearch
|
48
|
+
test_files: []
|