neon_documents 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +12 -0
- data/.rspec +3 -0
- data/Gemfile +22 -0
- data/Gemfile.lock +90 -0
- data/README.md +18 -0
- data/Rakefile +6 -0
- data/lib/neon_documents.rb +10 -0
- data/lib/neon_documents/consumer.rb +30 -0
- data/lib/neon_documents/document_processor.rb +46 -0
- data/lib/neon_documents/document_templates/webpage_screenshot.rb +11 -0
- data/lib/neon_documents/version.rb +3 -0
- data/neon_documents.gemspec +25 -0
- metadata +59 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: ac10ee5c8b168a8c0962a03575afa4f2354eb2cc841f177ef406d52b4b13308d
|
4
|
+
data.tar.gz: 794195193edefa7b7203a819fe88c0cdad46de2cbab605c26f558df54e4d38ab
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 4344aaaa9f767494b1b13b4086e4dfdf70a8639cf01a6ab6e5c6762258ec6049c79160ee828794d7fa598cab1de4b911d2edc57387ade28084f544220894ac83
|
7
|
+
data.tar.gz: df0996a61516f0f9a5df3275e410a30e38294f853de80470efecae51d3d898d9b7f10cf4bede3d0e8cd23a25557a6c0cf44144007bbd8cb16fe4168a9399a581
|
data/.gitignore
ADDED
data/.rspec
ADDED
data/Gemfile
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
source "https://rubygems.org"
|
2
|
+
|
3
|
+
# Specify your gem's dependencies in neon_postgres.gemspec
|
4
|
+
gemspec
|
5
|
+
|
6
|
+
gem "rake", "~> 12.0"
|
7
|
+
gem "rspec", "~> 3.0"
|
8
|
+
gem "standard"
|
9
|
+
|
10
|
+
gem "sequel", "~> 5.43"
|
11
|
+
gem "fixture_dependencies", "~> 1.10"
|
12
|
+
gem "pg", "~> 1.2"
|
13
|
+
|
14
|
+
gem "pry", "~> 0.14.1"
|
15
|
+
|
16
|
+
gem "faker", "~> 2.17"
|
17
|
+
|
18
|
+
gem "ruby-kafka", "~> 1.3"
|
19
|
+
|
20
|
+
gem "sendgrid-ruby", "~> 6.4"
|
21
|
+
|
22
|
+
gem "neon_postgres", "~> 0.0.1"
|
data/Gemfile.lock
ADDED
@@ -0,0 +1,90 @@
|
|
1
|
+
PATH
|
2
|
+
remote: .
|
3
|
+
specs:
|
4
|
+
neon_documents (0.0.1)
|
5
|
+
|
6
|
+
GEM
|
7
|
+
remote: https://rubygems.org/
|
8
|
+
specs:
|
9
|
+
ast (2.4.2)
|
10
|
+
coderay (1.1.3)
|
11
|
+
concurrent-ruby (1.1.8)
|
12
|
+
diff-lcs (1.4.4)
|
13
|
+
digest-crc (0.6.3)
|
14
|
+
rake (>= 12.0.0, < 14.0.0)
|
15
|
+
faker (2.17.0)
|
16
|
+
i18n (>= 1.6, < 2)
|
17
|
+
fixture_dependencies (1.10.0)
|
18
|
+
i18n (1.8.10)
|
19
|
+
concurrent-ruby (~> 1.0)
|
20
|
+
method_source (1.0.0)
|
21
|
+
neon_postgres (0.0.1)
|
22
|
+
parallel (1.20.1)
|
23
|
+
parser (3.0.1.0)
|
24
|
+
ast (~> 2.4.1)
|
25
|
+
pg (1.2.3)
|
26
|
+
pry (0.14.1)
|
27
|
+
coderay (~> 1.1)
|
28
|
+
method_source (~> 1.0)
|
29
|
+
rainbow (3.0.0)
|
30
|
+
rake (12.3.3)
|
31
|
+
regexp_parser (2.1.1)
|
32
|
+
rexml (3.2.5)
|
33
|
+
rspec (3.10.0)
|
34
|
+
rspec-core (~> 3.10.0)
|
35
|
+
rspec-expectations (~> 3.10.0)
|
36
|
+
rspec-mocks (~> 3.10.0)
|
37
|
+
rspec-core (3.10.1)
|
38
|
+
rspec-support (~> 3.10.0)
|
39
|
+
rspec-expectations (3.10.1)
|
40
|
+
diff-lcs (>= 1.2.0, < 2.0)
|
41
|
+
rspec-support (~> 3.10.0)
|
42
|
+
rspec-mocks (3.10.2)
|
43
|
+
diff-lcs (>= 1.2.0, < 2.0)
|
44
|
+
rspec-support (~> 3.10.0)
|
45
|
+
rspec-support (3.10.2)
|
46
|
+
rubocop (1.12.1)
|
47
|
+
parallel (~> 1.10)
|
48
|
+
parser (>= 3.0.0.0)
|
49
|
+
rainbow (>= 2.2.2, < 4.0)
|
50
|
+
regexp_parser (>= 1.8, < 3.0)
|
51
|
+
rexml
|
52
|
+
rubocop-ast (>= 1.2.0, < 2.0)
|
53
|
+
ruby-progressbar (~> 1.7)
|
54
|
+
unicode-display_width (>= 1.4.0, < 3.0)
|
55
|
+
rubocop-ast (1.4.1)
|
56
|
+
parser (>= 2.7.1.5)
|
57
|
+
rubocop-performance (1.10.1)
|
58
|
+
rubocop (>= 0.90.0, < 2.0)
|
59
|
+
rubocop-ast (>= 0.4.0)
|
60
|
+
ruby-kafka (1.3.0)
|
61
|
+
digest-crc
|
62
|
+
ruby-progressbar (1.11.0)
|
63
|
+
ruby_http_client (3.5.2)
|
64
|
+
sendgrid-ruby (6.4.0)
|
65
|
+
ruby_http_client (~> 3.4)
|
66
|
+
sequel (5.43.0)
|
67
|
+
standard (1.0.5)
|
68
|
+
rubocop (= 1.12.1)
|
69
|
+
rubocop-performance (= 1.10.1)
|
70
|
+
unicode-display_width (2.0.0)
|
71
|
+
|
72
|
+
PLATFORMS
|
73
|
+
ruby
|
74
|
+
|
75
|
+
DEPENDENCIES
|
76
|
+
faker (~> 2.17)
|
77
|
+
fixture_dependencies (~> 1.10)
|
78
|
+
neon_documents!
|
79
|
+
neon_postgres (~> 0.0.1)
|
80
|
+
pg (~> 1.2)
|
81
|
+
pry (~> 0.14.1)
|
82
|
+
rake (~> 12.0)
|
83
|
+
rspec (~> 3.0)
|
84
|
+
ruby-kafka (~> 1.3)
|
85
|
+
sendgrid-ruby (~> 6.4)
|
86
|
+
sequel (~> 5.43)
|
87
|
+
standard
|
88
|
+
|
89
|
+
BUNDLED WITH
|
90
|
+
2.1.4
|
data/README.md
ADDED
@@ -0,0 +1,18 @@
|
|
1
|
+
# Neon Documents
|
2
|
+
|
3
|
+
This gem is used to listen to document messages in Kafka and then interact with
|
4
|
+
the GCP Storage API.
|
5
|
+
|
6
|
+
## Necessary API Keys and environment variables.
|
7
|
+
|
8
|
+
* Kafka URL - Where to consume document message
|
9
|
+
* Postgres URL - The PostgreSQL URL for connecting
|
10
|
+
* GCP Credentials - A GCP Key JSON for a service account with access to the
|
11
|
+
UNPROCESSED_DOCUMENTS_BUCKET and the PRIVATE_DOCUMENT_BUCKET
|
12
|
+
* UNPROCESSED_DOCUMENTS_BUCKET - the GCP bucket name for unprocessed documents
|
13
|
+
* PRIVATE_DOCUMENT_BUCKET - the GCP bucket for processed, private documents
|
14
|
+
|
15
|
+
## Contributing
|
16
|
+
|
17
|
+
Bug reports and pull requests are welcome on GitHub at
|
18
|
+
https://github.com/neonlaw/codebase.
|
data/Rakefile
ADDED
@@ -0,0 +1,10 @@
|
|
1
|
+
require "neon_documents/version"
|
2
|
+
require "neon_documents/consumer"
|
3
|
+
require "neon_documents/document_processor"
|
4
|
+
Dir["#{__dir__}/neon_documents/document_templates/*"].sort.each { |file|
|
5
|
+
require file
|
6
|
+
}
|
7
|
+
|
8
|
+
module NeonDocuments
|
9
|
+
class Error < StandardError; end
|
10
|
+
end
|
@@ -0,0 +1,30 @@
|
|
1
|
+
require "kafka"
|
2
|
+
|
3
|
+
module NeonDocuments
|
4
|
+
class Consumer
|
5
|
+
def self.consume_messages
|
6
|
+
kafka = Kafka.new([ENV.fetch("KAFKA_URL") { "kafka:9092" }])
|
7
|
+
|
8
|
+
# Consumers with the same group id will form a Consumer Group together.
|
9
|
+
consumer = kafka.consumer(group_id: "documents")
|
10
|
+
|
11
|
+
# It's possible to subscribe to multiple topics by calling `subscribe`
|
12
|
+
# repeatedly.
|
13
|
+
consumer.subscribe("process_document")
|
14
|
+
|
15
|
+
# Stop the consumer when the SIGTERM signal is sent to the process.
|
16
|
+
# It's better to shut down gracefully than to kill the process.
|
17
|
+
trap("TERM") { consumer.stop }
|
18
|
+
|
19
|
+
# This will loop indefinitely, yielding each message in turn.
|
20
|
+
consumer.each_message do |message|
|
21
|
+
case message.topic
|
22
|
+
when "process_document"
|
23
|
+
NeonDocument::DocumentProcessor.encode_and_store_document(
|
24
|
+
unprocessed_document_id: message.value
|
25
|
+
)
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
@@ -0,0 +1,46 @@
|
|
1
|
+
require "sendgrid-ruby"
|
2
|
+
|
3
|
+
module NeonDocuments
|
4
|
+
class DocumentProcessor
|
5
|
+
def self.encode_and_store_document(unprocessed_document_id:)
|
6
|
+
new(
|
7
|
+
unprocessed_document_id: unprocessed_document_id
|
8
|
+
).encode_and_store_document
|
9
|
+
end
|
10
|
+
|
11
|
+
def initialize(unprocessed_document_id:)
|
12
|
+
@unprocessed_document_id = unprunprocessed_document_id
|
13
|
+
end
|
14
|
+
|
15
|
+
def encode_and_store_document
|
16
|
+
filename = case document_template.name
|
17
|
+
when "Webpage Screenshot"
|
18
|
+
NeonDocuments::DocumentTemplates::WebpageScreenshot.processed_filename(
|
19
|
+
unprocessed_filename: unprocessed_document.filename
|
20
|
+
)
|
21
|
+
end
|
22
|
+
|
23
|
+
puts filename
|
24
|
+
end
|
25
|
+
|
26
|
+
private
|
27
|
+
|
28
|
+
attr_reader :unprocessed_document_id
|
29
|
+
|
30
|
+
def connection
|
31
|
+
@_connection ||= NeonPostgres::Database.connection
|
32
|
+
end
|
33
|
+
|
34
|
+
def unprocessed_document
|
35
|
+
@_unprocessed_document ||= connection[:unprocessed_documents].find(
|
36
|
+
id: unprocessed_document_id
|
37
|
+
)
|
38
|
+
end
|
39
|
+
|
40
|
+
def document_template
|
41
|
+
@_document_template ||= connection[:document_templates].find(
|
42
|
+
id: unprocessed_document.fetch(:document_template_id)
|
43
|
+
)
|
44
|
+
end
|
45
|
+
end
|
46
|
+
end
|
@@ -0,0 +1,25 @@
|
|
1
|
+
require_relative "lib/neon_documents/version"
|
2
|
+
|
3
|
+
Gem::Specification.new do |spec|
|
4
|
+
spec.name = "neon_documents"
|
5
|
+
spec.version = NeonDocuments::VERSION
|
6
|
+
spec.authors = ["Neon Law"]
|
7
|
+
spec.email = ["support@neonlaw.com"]
|
8
|
+
|
9
|
+
spec.summary = "A gem to process files."
|
10
|
+
spec.description = 'This gem contains tests and sequel setup to speak to
|
11
|
+
the Neon Law database'
|
12
|
+
spec.homepage = "https://github.com/neonlaw/codebase"
|
13
|
+
spec.required_ruby_version = Gem::Requirement.new(">= 2.3.0")
|
14
|
+
|
15
|
+
spec.metadata["homepage_uri"] = spec.homepage
|
16
|
+
spec.metadata["source_code_uri"] = spec.homepage
|
17
|
+
spec.metadata["changelog_uri"] = spec.homepage
|
18
|
+
|
19
|
+
# Specify which files should be added to the gem when it is released.
|
20
|
+
# The `git ls-files -z` loads the files in the RubyGem that have been added into git.
|
21
|
+
spec.files = Dir.chdir(File.expand_path("..", __FILE__)) do
|
22
|
+
`git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
|
23
|
+
end
|
24
|
+
spec.require_paths = ["lib"]
|
25
|
+
end
|
metadata
ADDED
@@ -0,0 +1,59 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: neon_documents
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.1
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Neon Law
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2021-05-10 00:00:00.000000000 Z
|
12
|
+
dependencies: []
|
13
|
+
description: |-
|
14
|
+
This gem contains tests and sequel setup to speak to
|
15
|
+
the Neon Law database
|
16
|
+
email:
|
17
|
+
- support@neonlaw.com
|
18
|
+
executables: []
|
19
|
+
extensions: []
|
20
|
+
extra_rdoc_files: []
|
21
|
+
files:
|
22
|
+
- ".gitignore"
|
23
|
+
- ".rspec"
|
24
|
+
- Gemfile
|
25
|
+
- Gemfile.lock
|
26
|
+
- README.md
|
27
|
+
- Rakefile
|
28
|
+
- lib/neon_documents.rb
|
29
|
+
- lib/neon_documents/consumer.rb
|
30
|
+
- lib/neon_documents/document_processor.rb
|
31
|
+
- lib/neon_documents/document_templates/webpage_screenshot.rb
|
32
|
+
- lib/neon_documents/version.rb
|
33
|
+
- neon_documents.gemspec
|
34
|
+
homepage: https://github.com/neonlaw/codebase
|
35
|
+
licenses: []
|
36
|
+
metadata:
|
37
|
+
homepage_uri: https://github.com/neonlaw/codebase
|
38
|
+
source_code_uri: https://github.com/neonlaw/codebase
|
39
|
+
changelog_uri: https://github.com/neonlaw/codebase
|
40
|
+
post_install_message:
|
41
|
+
rdoc_options: []
|
42
|
+
require_paths:
|
43
|
+
- lib
|
44
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
45
|
+
requirements:
|
46
|
+
- - ">="
|
47
|
+
- !ruby/object:Gem::Version
|
48
|
+
version: 2.3.0
|
49
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
50
|
+
requirements:
|
51
|
+
- - ">="
|
52
|
+
- !ruby/object:Gem::Version
|
53
|
+
version: '0'
|
54
|
+
requirements: []
|
55
|
+
rubygems_version: 3.2.15
|
56
|
+
signing_key:
|
57
|
+
specification_version: 4
|
58
|
+
summary: A gem to process files.
|
59
|
+
test_files: []
|