pdf_search 0.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/bin/pdf_search +18 -0
- data/lib/pdf_dir.rb +33 -0
- data/lib/pdf_index.rb +5 -0
- data/lib/pdf_search.rb +5 -0
- metadata +48 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: 5506277019033c54c84b0aeec3deae307b304af0392f17bd7e5f1147feb07ad8
|
4
|
+
data.tar.gz: d0488eefb43bdd4cbba1e9bfc0656eefd3e4ff41762b70d4f1e8dc0558fd73c3
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 8a20e3fffff896a967b395bae72483d99f733bf1bc1c949c3ac0f4426bd066550741c3b9e664ce71b633fb0b4e2f5c34f958123cb758deb181aaabbd5e1b8bd0
|
7
|
+
data.tar.gz: a2555c899aba3123f5f99b985590d75927515044111d8a7556fae92848b6a39dc335f6aa29aa1d6a3e0dada710e2e8e07e0599277240f783faff489c3b52c699
|
data/bin/pdf_search
ADDED
@@ -0,0 +1,18 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
require 'pdfsearch'
|
3
|
+
require 'webrick'
|
4
|
+
|
5
|
+
`sudo service elasticsearch start`
|
6
|
+
|
7
|
+
|
8
|
+
gemDir = [File.dirname(__FILE__), '..']
|
9
|
+
indexPdfsScriptPath = File.expand_path(File.join(gemDir + ['lib', 'indexPdfs.rb']))
|
10
|
+
htmlDir = File.expand_path(File.join(gemDir + ['html']))
|
11
|
+
|
12
|
+
|
13
|
+
WEBrick::HTTPServer.new(:Port => 80, :DocumentRoot => htmlDir).start
|
14
|
+
|
15
|
+
|
16
|
+
# PdfSearch::Indexer.startDeamon
|
17
|
+
|
18
|
+
|
data/lib/pdf_dir.rb
ADDED
@@ -0,0 +1,33 @@
|
|
1
|
+
#encoding: UTF-8
|
2
|
+
require 'pdf-reader'
|
3
|
+
require 'pry'
|
4
|
+
|
5
|
+
module PdfSearch
|
6
|
+
class PdfDir
|
7
|
+
def initialize(dir = '.')
|
8
|
+
@dir = dir
|
9
|
+
end
|
10
|
+
|
11
|
+
def pdf_file_paths
|
12
|
+
Dir.glob(File.join(File.expand_path(@dir), '*.pdf'))
|
13
|
+
end
|
14
|
+
|
15
|
+
def each_pdf
|
16
|
+
Enumerator.new do |e|
|
17
|
+
pdf_file_paths.each do |pdf_file_path|
|
18
|
+
e << PDF::Reader.new(pdf_file_path)
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
23
|
+
def each_page
|
24
|
+
Enumerator.new do |e|
|
25
|
+
each_pdf do |pdf_reader|
|
26
|
+
pdf_reader.pages.each do |page|
|
27
|
+
e << page
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
33
|
+
end
|
data/lib/pdf_index.rb
ADDED
data/lib/pdf_search.rb
ADDED
metadata
ADDED
@@ -0,0 +1,48 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: pdf_search
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.0
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Manuel Arno Korfmann
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2010-04-28 00:00:00.000000000 Z
|
12
|
+
dependencies: []
|
13
|
+
description: Run in any directory containing pdfs using `$ pdf_search`
|
14
|
+
email: manu@korfmann.info
|
15
|
+
executables:
|
16
|
+
- pdf_search
|
17
|
+
extensions: []
|
18
|
+
extra_rdoc_files: []
|
19
|
+
files:
|
20
|
+
- bin/pdf_search
|
21
|
+
- lib/pdf_dir.rb
|
22
|
+
- lib/pdf_index.rb
|
23
|
+
- lib/pdf_search.rb
|
24
|
+
homepage: https://github.com/banalBI/pdfsearch
|
25
|
+
licenses:
|
26
|
+
- MIT
|
27
|
+
metadata: {}
|
28
|
+
post_install_message:
|
29
|
+
rdoc_options: []
|
30
|
+
require_paths:
|
31
|
+
- lib
|
32
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
33
|
+
requirements:
|
34
|
+
- - ">="
|
35
|
+
- !ruby/object:Gem::Version
|
36
|
+
version: '0'
|
37
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
38
|
+
requirements:
|
39
|
+
- - ">="
|
40
|
+
- !ruby/object:Gem::Version
|
41
|
+
version: '0'
|
42
|
+
requirements: []
|
43
|
+
rubyforge_project:
|
44
|
+
rubygems_version: 2.7.6
|
45
|
+
signing_key:
|
46
|
+
specification_version: 4
|
47
|
+
summary: Searching pdfs by leveragin Elasticsearch
|
48
|
+
test_files: []
|