pdf_search 0.1.2 → 0.1.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/elastic_search_query.rb +8 -6
- data/lib/pdf_dir.rb +6 -6
- data/lib/pdf_index.rb +15 -13
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: ffe0b0026bb6eeb3c77ee7a63929f5c3198f7532f3664f13d5b37c3782767c94
|
4
|
+
data.tar.gz: acc3deac1e4c5dbe585399783ebe8c769c0c1eaf6277cb3c6ef96a27578e6581
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 6edf518ba0c6c7d98a9c29539f34c94d5c6785221f3427f24c01817c9bde3f192c3d617fb37efa6f025896481c6fb8f1199768d750798f762a5bbd425dc08e05
|
7
|
+
data.tar.gz: '091a61159f22d048caaf9aa9758fb75d0a612525a00ff48beb12cbd2c2880459e31e339e7315ac673322c4627b8b8a99dc6a28e2cbaaf9eb158cf5e708b422bf'
|
data/lib/elastic_search_query.rb
CHANGED
@@ -1,12 +1,13 @@
|
|
1
1
|
require 'elasticsearch/dsl'
|
2
|
+
require 'pry-remote'
|
2
3
|
module PdfSearch
|
3
4
|
class ElasticSearchQuery
|
4
5
|
include Elasticsearch::DSL
|
5
|
-
attr_reader :
|
6
|
+
attr_reader :client_query_specification, :search_index
|
6
7
|
|
7
|
-
def initialize(
|
8
|
+
def initialize(client_query_specification, search_index)
|
8
9
|
@search_index = search_index
|
9
|
-
@
|
10
|
+
@client_query_specification = client_query_specification
|
10
11
|
end
|
11
12
|
|
12
13
|
def to_hash
|
@@ -16,7 +17,7 @@ module PdfSearch
|
|
16
17
|
"must": [
|
17
18
|
{
|
18
19
|
"match": {
|
19
|
-
"text":
|
20
|
+
"text": client_query_specification['search']
|
20
21
|
}
|
21
22
|
}
|
22
23
|
].concat(range_queries)
|
@@ -26,12 +27,13 @@ module PdfSearch
|
|
26
27
|
end
|
27
28
|
|
28
29
|
def range_queries
|
30
|
+
return [] if search_index.search_input_fields_by_type.nil?
|
29
31
|
search_index.search_input_fields_by_type[:interval].map do |name|
|
30
32
|
{
|
31
33
|
"range": {
|
32
34
|
name => {
|
33
|
-
gte:
|
34
|
-
lte:
|
35
|
+
gte: client_query_specification["search_#{name}_start"],
|
36
|
+
lte: client_query_specification["search_#{name}_end"]
|
35
37
|
}
|
36
38
|
}
|
37
39
|
}
|
data/lib/pdf_dir.rb
CHANGED
@@ -8,15 +8,15 @@ module PdfSearch
|
|
8
8
|
@dir = dir
|
9
9
|
end
|
10
10
|
|
11
|
-
|
12
|
-
|
13
|
-
|
11
|
+
def pdf_file_paths
|
12
|
+
Dir.glob(File.join(File.expand_path(@dir), '*.pdf'))
|
13
|
+
end
|
14
14
|
|
15
15
|
def pdf_documents
|
16
16
|
Enumerator.new do |e|
|
17
|
-
|
18
|
-
|
19
|
-
|
17
|
+
pdf_file_paths.each do |pdf_file_path|
|
18
|
+
e << PDF::Reader.new(pdf_file_path)
|
19
|
+
end
|
20
20
|
end
|
21
21
|
end
|
22
22
|
|
data/lib/pdf_index.rb
CHANGED
@@ -39,13 +39,15 @@ class PdfSearch::PdfIndex
|
|
39
39
|
@properties ||= {}
|
40
40
|
@properties[property_name] = {type: type}
|
41
41
|
|
42
|
-
@search_input_fields ||= {}
|
43
|
-
@search_input_fields_by_type ||= {}
|
44
|
-
|
45
42
|
search_input_type = options.delete(:search)
|
46
|
-
|
47
|
-
|
48
|
-
|
43
|
+
|
44
|
+
if search_input_type
|
45
|
+
@search_input_fields ||= {}
|
46
|
+
@search_input_fields[property_name] = search_input_type
|
47
|
+
|
48
|
+
@search_input_fields_by_type ||= Hash.new { |h,k| h[k] = [] }
|
49
|
+
@search_input_fields_by_type[search_input_type].push(property_name)
|
50
|
+
end
|
49
51
|
end
|
50
52
|
|
51
53
|
def search_input_fields_by_type
|
@@ -113,13 +115,13 @@ class PdfSearch::PdfIndex
|
|
113
115
|
|
114
116
|
def create_page_document(pdf_id, text, additional_data)
|
115
117
|
@els_client.create(
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
118
|
+
index: 'pdf_pages',
|
119
|
+
type: 'document',
|
120
|
+
id: combined_pdf_page_id(pdf_id, text),
|
121
|
+
body: {
|
122
|
+
text: text
|
123
|
+
}.merge(additional_data)
|
124
|
+
)
|
123
125
|
end
|
124
126
|
|
125
127
|
def combined_pdf_page_id(pdf_id, text)
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: pdf_search
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Manuel Arno Korfmann
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2019-01-02 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: pdf-reader
|