neon_documents 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: ac10ee5c8b168a8c0962a03575afa4f2354eb2cc841f177ef406d52b4b13308d
4
+ data.tar.gz: 794195193edefa7b7203a819fe88c0cdad46de2cbab605c26f558df54e4d38ab
5
+ SHA512:
6
+ metadata.gz: 4344aaaa9f767494b1b13b4086e4dfdf70a8639cf01a6ab6e5c6762258ec6049c79160ee828794d7fa598cab1de4b911d2edc57387ade28084f544220894ac83
7
+ data.tar.gz: df0996a61516f0f9a5df3275e410a30e38294f853de80470efecae51d3d898d9b7f10cf4bede3d0e8cd23a25557a6c0cf44144007bbd8cb16fe4168a9399a581
data/.gitignore ADDED
@@ -0,0 +1,12 @@
1
+ /.bundle/
2
+ /.yardoc
3
+ /_yardoc/
4
+ /coverage/
5
+ /doc/
6
+ /pkg/
7
+ /spec/reports/
8
+ /tmp/
9
+
10
+ # rspec failure tracking
11
+ .rspec_status
12
+ *.gem
data/.rspec ADDED
@@ -0,0 +1,3 @@
1
+ --format documentation
2
+ --color
3
+ --require spec_helper
data/Gemfile ADDED
@@ -0,0 +1,22 @@
1
+ source "https://rubygems.org"
2
+
3
+ # Specify your gem's dependencies in neon_postgres.gemspec
4
+ gemspec
5
+
6
+ gem "rake", "~> 12.0"
7
+ gem "rspec", "~> 3.0"
8
+ gem "standard"
9
+
10
+ gem "sequel", "~> 5.43"
11
+ gem "fixture_dependencies", "~> 1.10"
12
+ gem "pg", "~> 1.2"
13
+
14
+ gem "pry", "~> 0.14.1"
15
+
16
+ gem "faker", "~> 2.17"
17
+
18
+ gem "ruby-kafka", "~> 1.3"
19
+
20
+ gem "sendgrid-ruby", "~> 6.4"
21
+
22
+ gem "neon_postgres", "~> 0.0.1"
data/Gemfile.lock ADDED
@@ -0,0 +1,90 @@
1
+ PATH
2
+ remote: .
3
+ specs:
4
+ neon_documents (0.0.1)
5
+
6
+ GEM
7
+ remote: https://rubygems.org/
8
+ specs:
9
+ ast (2.4.2)
10
+ coderay (1.1.3)
11
+ concurrent-ruby (1.1.8)
12
+ diff-lcs (1.4.4)
13
+ digest-crc (0.6.3)
14
+ rake (>= 12.0.0, < 14.0.0)
15
+ faker (2.17.0)
16
+ i18n (>= 1.6, < 2)
17
+ fixture_dependencies (1.10.0)
18
+ i18n (1.8.10)
19
+ concurrent-ruby (~> 1.0)
20
+ method_source (1.0.0)
21
+ neon_postgres (0.0.1)
22
+ parallel (1.20.1)
23
+ parser (3.0.1.0)
24
+ ast (~> 2.4.1)
25
+ pg (1.2.3)
26
+ pry (0.14.1)
27
+ coderay (~> 1.1)
28
+ method_source (~> 1.0)
29
+ rainbow (3.0.0)
30
+ rake (12.3.3)
31
+ regexp_parser (2.1.1)
32
+ rexml (3.2.5)
33
+ rspec (3.10.0)
34
+ rspec-core (~> 3.10.0)
35
+ rspec-expectations (~> 3.10.0)
36
+ rspec-mocks (~> 3.10.0)
37
+ rspec-core (3.10.1)
38
+ rspec-support (~> 3.10.0)
39
+ rspec-expectations (3.10.1)
40
+ diff-lcs (>= 1.2.0, < 2.0)
41
+ rspec-support (~> 3.10.0)
42
+ rspec-mocks (3.10.2)
43
+ diff-lcs (>= 1.2.0, < 2.0)
44
+ rspec-support (~> 3.10.0)
45
+ rspec-support (3.10.2)
46
+ rubocop (1.12.1)
47
+ parallel (~> 1.10)
48
+ parser (>= 3.0.0.0)
49
+ rainbow (>= 2.2.2, < 4.0)
50
+ regexp_parser (>= 1.8, < 3.0)
51
+ rexml
52
+ rubocop-ast (>= 1.2.0, < 2.0)
53
+ ruby-progressbar (~> 1.7)
54
+ unicode-display_width (>= 1.4.0, < 3.0)
55
+ rubocop-ast (1.4.1)
56
+ parser (>= 2.7.1.5)
57
+ rubocop-performance (1.10.1)
58
+ rubocop (>= 0.90.0, < 2.0)
59
+ rubocop-ast (>= 0.4.0)
60
+ ruby-kafka (1.3.0)
61
+ digest-crc
62
+ ruby-progressbar (1.11.0)
63
+ ruby_http_client (3.5.2)
64
+ sendgrid-ruby (6.4.0)
65
+ ruby_http_client (~> 3.4)
66
+ sequel (5.43.0)
67
+ standard (1.0.5)
68
+ rubocop (= 1.12.1)
69
+ rubocop-performance (= 1.10.1)
70
+ unicode-display_width (2.0.0)
71
+
72
+ PLATFORMS
73
+ ruby
74
+
75
+ DEPENDENCIES
76
+ faker (~> 2.17)
77
+ fixture_dependencies (~> 1.10)
78
+ neon_documents!
79
+ neon_postgres (~> 0.0.1)
80
+ pg (~> 1.2)
81
+ pry (~> 0.14.1)
82
+ rake (~> 12.0)
83
+ rspec (~> 3.0)
84
+ ruby-kafka (~> 1.3)
85
+ sendgrid-ruby (~> 6.4)
86
+ sequel (~> 5.43)
87
+ standard
88
+
89
+ BUNDLED WITH
90
+ 2.1.4
data/README.md ADDED
@@ -0,0 +1,18 @@
1
+ # Neon Documents
2
+
3
+ This gem is used to listen to document messages in Kafka and then interact with
4
+ the GCP Storage API.
5
+
6
+ ## Necessary API Keys and environment variables.
7
+
8
+ * Kafka URL - Where to consume document message
9
+ * Postgres URL - The PostgreSQL URL for connecting
10
+ * GCP Credentials - A GCP Key JSON for a service account with access to the
11
+ UNPROCESSED_DOCUMENTS_BUCKET and the PRIVATE_DOCUMENT_BUCKET
12
+ * UNPROCESSED_DOCUMENTS_BUCKET - the GCP bucket name for unprocessed documents
13
+ * PRIVATE_DOCUMENT_BUCKET - the GCP bucket for processed, private documents
14
+
15
+ ## Contributing
16
+
17
+ Bug reports and pull requests are welcome on GitHub at
18
+ https://github.com/neonlaw/codebase.
data/Rakefile ADDED
@@ -0,0 +1,6 @@
1
+ require "bundler/gem_tasks"
2
+ require "rspec/core/rake_task"
3
+
4
+ RSpec::Core::RakeTask.new(:spec)
5
+
6
+ task default: :spec
@@ -0,0 +1,10 @@
1
+ require "neon_documents/version"
2
+ require "neon_documents/consumer"
3
+ require "neon_documents/document_processor"
4
+ Dir["#{__dir__}/neon_documents/document_templates/*"].sort.each { |file|
5
+ require file
6
+ }
7
+
8
+ module NeonDocuments
9
+ class Error < StandardError; end
10
+ end
@@ -0,0 +1,30 @@
1
+ require "kafka"
2
+
3
+ module NeonDocuments
4
+ class Consumer
5
+ def self.consume_messages
6
+ kafka = Kafka.new([ENV.fetch("KAFKA_URL") { "kafka:9092" }])
7
+
8
+ # Consumers with the same group id will form a Consumer Group together.
9
+ consumer = kafka.consumer(group_id: "documents")
10
+
11
+ # It's possible to subscribe to multiple topics by calling `subscribe`
12
+ # repeatedly.
13
+ consumer.subscribe("process_document")
14
+
15
+ # Stop the consumer when the SIGTERM signal is sent to the process.
16
+ # It's better to shut down gracefully than to kill the process.
17
+ trap("TERM") { consumer.stop }
18
+
19
+ # This will loop indefinitely, yielding each message in turn.
20
+ consumer.each_message do |message|
21
+ case message.topic
22
+ when "process_document"
23
+ NeonDocument::DocumentProcessor.encode_and_store_document(
24
+ unprocessed_document_id: message.value
25
+ )
26
+ end
27
+ end
28
+ end
29
+ end
30
+ end
@@ -0,0 +1,46 @@
1
+ require "sendgrid-ruby"
2
+
3
+ module NeonDocuments
4
+ class DocumentProcessor
5
+ def self.encode_and_store_document(unprocessed_document_id:)
6
+ new(
7
+ unprocessed_document_id: unprocessed_document_id
8
+ ).encode_and_store_document
9
+ end
10
+
11
+ def initialize(unprocessed_document_id:)
12
+ @unprocessed_document_id = unprunprocessed_document_id
13
+ end
14
+
15
+ def encode_and_store_document
16
+ filename = case document_template.name
17
+ when "Webpage Screenshot"
18
+ NeonDocuments::DocumentTemplates::WebpageScreenshot.processed_filename(
19
+ unprocessed_filename: unprocessed_document.filename
20
+ )
21
+ end
22
+
23
+ puts filename
24
+ end
25
+
26
+ private
27
+
28
+ attr_reader :unprocessed_document_id
29
+
30
+ def connection
31
+ @_connection ||= NeonPostgres::Database.connection
32
+ end
33
+
34
+ def unprocessed_document
35
+ @_unprocessed_document ||= connection[:unprocessed_documents].find(
36
+ id: unprocessed_document_id
37
+ )
38
+ end
39
+
40
+ def document_template
41
+ @_document_template ||= connection[:document_templates].find(
42
+ id: unprocessed_document.fetch(:document_template_id)
43
+ )
44
+ end
45
+ end
46
+ end
@@ -0,0 +1,11 @@
1
+ module NeonDocuments
2
+ module DocumentTemplates
3
+ class WebpageScreenshot
4
+ def self.processed_filename(unprocessed_filename:)
5
+ unprocessed_filename
6
+ .gsub(/:\/\//, "___")
7
+ .tr(".", "_")
8
+ end
9
+ end
10
+ end
11
+ end
@@ -0,0 +1,3 @@
1
+ module NeonDocuments
2
+ VERSION = "0.0.1"
3
+ end
@@ -0,0 +1,25 @@
1
+ require_relative "lib/neon_documents/version"
2
+
3
+ Gem::Specification.new do |spec|
4
+ spec.name = "neon_documents"
5
+ spec.version = NeonDocuments::VERSION
6
+ spec.authors = ["Neon Law"]
7
+ spec.email = ["support@neonlaw.com"]
8
+
9
+ spec.summary = "A gem to process files."
10
+ spec.description = 'This gem contains tests and sequel setup to speak to
11
+ the Neon Law database'
12
+ spec.homepage = "https://github.com/neonlaw/codebase"
13
+ spec.required_ruby_version = Gem::Requirement.new(">= 2.3.0")
14
+
15
+ spec.metadata["homepage_uri"] = spec.homepage
16
+ spec.metadata["source_code_uri"] = spec.homepage
17
+ spec.metadata["changelog_uri"] = spec.homepage
18
+
19
+ # Specify which files should be added to the gem when it is released.
20
+ # The `git ls-files -z` loads the files in the RubyGem that have been added into git.
21
+ spec.files = Dir.chdir(File.expand_path("..", __FILE__)) do
22
+ `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
23
+ end
24
+ spec.require_paths = ["lib"]
25
+ end
metadata ADDED
@@ -0,0 +1,59 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: neon_documents
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ platform: ruby
6
+ authors:
7
+ - Neon Law
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2021-05-10 00:00:00.000000000 Z
12
+ dependencies: []
13
+ description: |-
14
+ This gem contains tests and sequel setup to speak to
15
+ the Neon Law database
16
+ email:
17
+ - support@neonlaw.com
18
+ executables: []
19
+ extensions: []
20
+ extra_rdoc_files: []
21
+ files:
22
+ - ".gitignore"
23
+ - ".rspec"
24
+ - Gemfile
25
+ - Gemfile.lock
26
+ - README.md
27
+ - Rakefile
28
+ - lib/neon_documents.rb
29
+ - lib/neon_documents/consumer.rb
30
+ - lib/neon_documents/document_processor.rb
31
+ - lib/neon_documents/document_templates/webpage_screenshot.rb
32
+ - lib/neon_documents/version.rb
33
+ - neon_documents.gemspec
34
+ homepage: https://github.com/neonlaw/codebase
35
+ licenses: []
36
+ metadata:
37
+ homepage_uri: https://github.com/neonlaw/codebase
38
+ source_code_uri: https://github.com/neonlaw/codebase
39
+ changelog_uri: https://github.com/neonlaw/codebase
40
+ post_install_message:
41
+ rdoc_options: []
42
+ require_paths:
43
+ - lib
44
+ required_ruby_version: !ruby/object:Gem::Requirement
45
+ requirements:
46
+ - - ">="
47
+ - !ruby/object:Gem::Version
48
+ version: 2.3.0
49
+ required_rubygems_version: !ruby/object:Gem::Requirement
50
+ requirements:
51
+ - - ">="
52
+ - !ruby/object:Gem::Version
53
+ version: '0'
54
+ requirements: []
55
+ rubygems_version: 3.2.15
56
+ signing_key:
57
+ specification_version: 4
58
+ summary: A gem to process files.
59
+ test_files: []