kishu 0.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +36 -0
- data/.rspec +3 -0
- data/.travis.yml +7 -0
- data/CODE_OF_CONDUCT.md +74 -0
- data/Gemfile +6 -0
- data/Gemfile.lock +222 -0
- data/LICENSE.txt +21 -0
- data/README.md +47 -0
- data/Rakefile +20 -0
- data/bin/kishu +5 -0
- data/kishu.gemspec +54 -0
- data/lib/kishu.rb +30 -0
- data/lib/kishu/base.rb +14 -0
- data/lib/kishu/cli.rb +42 -0
- data/lib/kishu/client.rb +89 -0
- data/lib/kishu/lagotto_job.rb +22 -0
- data/lib/kishu/log.rb +33 -0
- data/lib/kishu/merger.rb +69 -0
- data/lib/kishu/pipeline.rb +29 -0
- data/lib/kishu/report.rb +149 -0
- data/lib/kishu/resolution_event.rb +83 -0
- data/lib/kishu/s3.rb +24 -0
- data/lib/kishu/sushi.rb +59 -0
- data/lib/kishu/usage_event.rb +124 -0
- data/lib/kishu/utils.rb +115 -0
- data/lib/kishu/version.rb +3 -0
- data/spec/factories/default.rb +71 -0
- data/spec/fixtures/vcr_cassettes/Kishu_Sushi/wrap_event/when_doi_doesn_t_exist/should_fail.yml +3867 -0
- data/spec/kishu_spec.rb +9 -0
- data/spec/report_spec.rb +79 -0
- data/spec/resolution_event_spec.rb +80 -0
- data/spec/spec_helper.rb +93 -0
- metadata +400 -0
data/bin/kishu
ADDED
data/kishu.gemspec
ADDED
@@ -0,0 +1,54 @@
|
|
1
|
+
require "date"
|
2
|
+
require File.expand_path("../lib/kishu/version", __FILE__)
|
3
|
+
|
4
|
+
|
5
|
+
Gem::Specification.new do |spec|
|
6
|
+
spec.name = "kishu"
|
7
|
+
spec.version = Kishu::VERSION
|
8
|
+
spec.authors = ["Kristian Garza"]
|
9
|
+
spec.email = ["kgarza@datacite.org"]
|
10
|
+
|
11
|
+
spec.summary = "Client for DOI Resolution Logs processing pipeline"
|
12
|
+
spec.description = "This client helps you to prepare logs to be consumed for the pipeline as well as for creating DOI resolution reports using the COUNTER CoP "
|
13
|
+
spec.homepage = "https://github.com/datacite/kishu"
|
14
|
+
spec.license = "MIT"
|
15
|
+
|
16
|
+
# Prevent pushing this gem to RubyGems.org. To allow pushes either set the 'allowed_push_host'
|
17
|
+
# to allow pushing to a single host or delete this section to allow pushing to any host.
|
18
|
+
# if spec.respond_to?(:metadata)
|
19
|
+
# spec.metadata["allowed_push_host"] = "TODO: Set to 'http://mygemserver.com'"
|
20
|
+
# else
|
21
|
+
# raise "RubyGems 2.0 or newer is required to protect against " \
|
22
|
+
# "public gem pushes."
|
23
|
+
# end
|
24
|
+
|
25
|
+
# Specify which files should be added to the gem when it is released.
|
26
|
+
# The `git ls-files -z` loads the files in the RubyGem that have been added into git.
|
27
|
+
|
28
|
+
# spec.add_dependency 'maremma', '>= 4.1', '< 5'
|
29
|
+
spec.add_dependency 'faraday', "~>0.15.3"
|
30
|
+
spec.add_dependency 'builder', '~> 3.2', '>= 3.2.2'
|
31
|
+
spec.add_dependency 'dotenv', '~> 2.1', '>= 2.1.1'
|
32
|
+
spec.add_dependency 'thor', '~> 0.19'
|
33
|
+
spec.add_dependency 'maremma', '>= 4.1', '< 5'
|
34
|
+
spec.add_dependency 'faraday_middleware-aws-sigv4', '~> 0.2.4'
|
35
|
+
spec.add_development_dependency "bundler", "~> 1.16"
|
36
|
+
spec.add_development_dependency "rake", "~> 10.0"
|
37
|
+
spec.add_development_dependency "rspec", "~> 3.0"
|
38
|
+
spec.add_development_dependency 'elasticsearch', '~> 6.1.0'
|
39
|
+
spec.add_development_dependency "thor", '~> 0.19'
|
40
|
+
spec.add_development_dependency "faraday", "~>0.15.3"
|
41
|
+
spec.add_development_dependency 'rack-test', '~> 0'
|
42
|
+
spec.add_development_dependency 'vcr', '~> 3.0', '>= 3.0.3'
|
43
|
+
spec.add_development_dependency 'webmock', '~> 3.0', '>= 3.0.1'
|
44
|
+
spec.add_development_dependency 'simplecov', '~> 0.14.1'
|
45
|
+
spec.add_development_dependency 'factory_bot', '~> 4.0'
|
46
|
+
spec.add_dependency 'sucker_punch', '~> 2.0'
|
47
|
+
spec.add_dependency 'bolognese', '~> 0.9', '>= 0.10'
|
48
|
+
spec.add_dependency 'elasticsearch', '~> 6.1.0'
|
49
|
+
|
50
|
+
spec.files = `git ls-files`.split($/)
|
51
|
+
spec.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
|
52
|
+
spec.executables = ["kishu"]
|
53
|
+
spec.require_paths = ["lib"]
|
54
|
+
end
|
data/lib/kishu.rb
ADDED
@@ -0,0 +1,30 @@
|
|
1
|
+
require "kishu/resolution_event"
|
2
|
+
require "kishu/usage_event"
|
3
|
+
require "kishu/report"
|
4
|
+
require "kishu/cli"
|
5
|
+
require "kishu/sushi"
|
6
|
+
require "kishu/version"
|
7
|
+
require "kishu/client"
|
8
|
+
require "kishu/log"
|
9
|
+
require "kishu/pipeline"
|
10
|
+
require "kishu/lagotto_job"
|
11
|
+
|
12
|
+
|
13
|
+
API_URL = ENV['API_URL'] ? ENV['API_URL'] : "https://api.datacite.org"
|
14
|
+
HUB_URL = ENV['HUB_URL'] ? ENV['HUB_URL'] : "https://api.test.datacite.org"
|
15
|
+
HUB_TOKEN = ENV['HUB_TOKEN'] ? ENV['HUB_TOKEN'] : ""
|
16
|
+
ES_HOST = ENV['ES_HOST'] ? ENV['ES_HOST'] : "localhost:9200"
|
17
|
+
ES_INDEX = ENV['ES_INDEX'] ? ENV['ES_INDEX'] : "resolutions"
|
18
|
+
LOGSTASH_HOST = ENV['LOGSTASH_HOST'] ? ENV['LOGSTASH_HOST'] : "localhost:9600"
|
19
|
+
LAGOTTINO_URL = ENV['LAGOTTINO_URL'] ? ENV['LAGOTTINO_URL'] : "https://api.test.datacite.org"
|
20
|
+
LAGOTTINO_TOKEN = ENV['LAGOTTINO_TOKEN'] ? ENV['LAGOTTINO_TOKEN'] : ""
|
21
|
+
LICENSE = ENV['LICENSE'] ? ENV['LICENSE'] : "https://creativecommons.org/publicdomain/zero/1.0/"
|
22
|
+
SOURCE_TOKEN = ENV['SOURCE_TOKEN'] ? ENV['SOURCE_TOKEN'] : "65903a54-01c8-4a3f-9bf2-04ecc658247a"
|
23
|
+
S3_MERGED_LOGS_BUCKET = ENV['S3_MERGED_LOGS_BUCKET'] ? ENV['S3_MERGED_LOGS_BUCKET'] : "./monthly_logs"
|
24
|
+
S3_RESOLUTION_LOGS_BUCKET = ENV['S3_RESOLUTION_LOGS_BUCKET'] ? ENV['S3_RESOLUTION_LOGS_BUCKET'] : "./"
|
25
|
+
AWS_REGION = ENV['AWS_REGION'] ? ENV['AWS_REGION'] : ""
|
26
|
+
AWS_ACCESS_KEY_ID = ENV['AWS_ACCESS_KEY_ID'] ? ENV['AWS_ACCESS_KEY_ID'] : ""
|
27
|
+
AWS_SECRET_ACCESS_KEY = ENV['AWS_SECRET_ACCESS_KEY'] ? ENV['AWS_SECRET_ACCESS_KEY'] : ""
|
28
|
+
ELASTIC_PASSWORD = ENV['ELASTIC_PASSWORD'] ? ENV['ELASTIC_PASSWORD'] : ""
|
29
|
+
LOGS_TAG = "[Resolution Logs]"
|
30
|
+
puts ENV.to_a
|
data/lib/kishu/base.rb
ADDED
@@ -0,0 +1,14 @@
|
|
1
|
+
require 'elasticsearch'
|
2
|
+
require 'json'
|
3
|
+
require 'faraday'
|
4
|
+
|
5
|
+
|
6
|
+
|
7
|
+
module Kishu
|
8
|
+
module Base
|
9
|
+
ES_HOST = ENV['ES_HOST'] ? ENV['ES_HOST'] : "localhost:9200"
|
10
|
+
|
11
|
+
# __elasticsearch__ = Faraday.new(url: ES_HOST)
|
12
|
+
__elasticsearch__ = Elasticsearch::Client.new host: ES_HOST, transport_options: { request: { timeout: 3600, open_timeout: 3600 } }
|
13
|
+
end
|
14
|
+
end
|
data/lib/kishu/cli.rb
ADDED
@@ -0,0 +1,42 @@
|
|
1
|
+
require 'thor'
|
2
|
+
|
3
|
+
|
4
|
+
require_relative 'sushi'
|
5
|
+
require_relative 'log'
|
6
|
+
|
7
|
+
|
8
|
+
module Kishu
|
9
|
+
class CLI < Thor
|
10
|
+
|
11
|
+
include Kishu::Base
|
12
|
+
include Kishu::Utils
|
13
|
+
# include Kishu::Report
|
14
|
+
include Kishu::Merger
|
15
|
+
# include Kishu::Event
|
16
|
+
|
17
|
+
# load ENV variables from .env file if it exists
|
18
|
+
env_file = File.expand_path("../../.env", __FILE__)
|
19
|
+
if File.exist?(env_file)
|
20
|
+
require 'dotenv'
|
21
|
+
Dotenv.overload env_file
|
22
|
+
end
|
23
|
+
|
24
|
+
def self.exit_on_failure?
|
25
|
+
true
|
26
|
+
end
|
27
|
+
|
28
|
+
# from http://stackoverflow.com/questions/22809972/adding-a-version-option-to-a-ruby-thor-cli
|
29
|
+
map %w[--version -v] => :__print_version
|
30
|
+
|
31
|
+
desc "--version, -v", "print the version"
|
32
|
+
def __print_version
|
33
|
+
puts Kishu::VERSION
|
34
|
+
end
|
35
|
+
|
36
|
+
desc "sushi SUBCOMMAND", "sushi commands"
|
37
|
+
subcommand "sushi", Kishu::Sushi
|
38
|
+
|
39
|
+
desc "log SUBCOMMAND", "log commands"
|
40
|
+
subcommand "log", Kishu::Log
|
41
|
+
end
|
42
|
+
end
|
data/lib/kishu/client.rb
ADDED
@@ -0,0 +1,89 @@
|
|
1
|
+
require 'faraday_middleware'
|
2
|
+
require 'faraday_middleware/aws_sigv4'
|
3
|
+
require 'logger'
|
4
|
+
|
5
|
+
require_relative 'utils'
|
6
|
+
require_relative 'base'
|
7
|
+
|
8
|
+
module Kishu
|
9
|
+
class Client
|
10
|
+
|
11
|
+
def initialize
|
12
|
+
|
13
|
+
if ES_HOST == "localhost:9200" || ES_HOST == "elasticsearch:9200"
|
14
|
+
@client = Elasticsearch::Client.new(host: ES_HOST, user: "elastic", password: ELASTIC_PASSWORD, transport_options: { request: { timeout: 3600, open_timeout: 3600 }}) do |f|
|
15
|
+
f.adapter Faraday.default_adapter
|
16
|
+
end
|
17
|
+
else
|
18
|
+
@client = Elasticsearch::Client.new(host: ES_HOST, port: '80', scheme: 'http') do |f|
|
19
|
+
f.request :aws_sigv4,
|
20
|
+
service: 'es',
|
21
|
+
region: AWS_REGION,
|
22
|
+
access_key_id: AWS_ACCESS_KEY_ID,
|
23
|
+
secret_access_key: AWS_SECRET_ACCESS_KEY
|
24
|
+
f.adapter Faraday.default_adapter
|
25
|
+
end
|
26
|
+
end
|
27
|
+
@client
|
28
|
+
end
|
29
|
+
|
30
|
+
|
31
|
+
def get options={}
|
32
|
+
|
33
|
+
x =@client.search(body:{
|
34
|
+
size: options[:size] ||= 0,
|
35
|
+
query: {
|
36
|
+
query_string: {
|
37
|
+
query: "*"
|
38
|
+
}
|
39
|
+
},
|
40
|
+
aggregations: aggregations(options)
|
41
|
+
},
|
42
|
+
index: ES_INDEX
|
43
|
+
)
|
44
|
+
x
|
45
|
+
end
|
46
|
+
|
47
|
+
def is_empty?
|
48
|
+
return true unless get
|
49
|
+
nil
|
50
|
+
end
|
51
|
+
|
52
|
+
def clear_index
|
53
|
+
@client.indices.delete index: ES_INDEX
|
54
|
+
puts "Resolutions index has been deleted"
|
55
|
+
end
|
56
|
+
|
57
|
+
|
58
|
+
def get_logdate options={}
|
59
|
+
@client.search(body:{
|
60
|
+
size: 1,
|
61
|
+
query: {
|
62
|
+
query_string: {
|
63
|
+
query: "*"
|
64
|
+
}
|
65
|
+
},
|
66
|
+
aggregations: aggregations(options)
|
67
|
+
},
|
68
|
+
index: "resolutions"
|
69
|
+
).dig("hits","hits",0,"_source","logdate")
|
70
|
+
end
|
71
|
+
|
72
|
+
def aggregations options={}
|
73
|
+
{
|
74
|
+
doi: {composite: {
|
75
|
+
sources: [{doi: {terms: {field: :doi }}}],
|
76
|
+
after: { doi: options.fetch(:after_key,"")},
|
77
|
+
size: options[:aggs_size]
|
78
|
+
},
|
79
|
+
aggs: {
|
80
|
+
unique: {terms: {field: "unique_usage"}},
|
81
|
+
totale: {terms: {field: "total_usage" }}
|
82
|
+
}
|
83
|
+
}
|
84
|
+
}
|
85
|
+
end
|
86
|
+
|
87
|
+
end
|
88
|
+
end
|
89
|
+
|
@@ -0,0 +1,22 @@
|
|
1
|
+
require_relative 'resolution_event'
|
2
|
+
|
3
|
+
|
4
|
+
class LagottoJob
|
5
|
+
include SuckerPunch::Job
|
6
|
+
include Kishu::Utils
|
7
|
+
workers 4
|
8
|
+
|
9
|
+
|
10
|
+
|
11
|
+
def perform(report, options={})
|
12
|
+
# data = format_instance event, options
|
13
|
+
|
14
|
+
# push_url = LAGOTTINO_URL + "/events"
|
15
|
+
# response = Maremma.post(push_url, data: data.to_json,
|
16
|
+
# bearer: LAGOTTINO_TOKEN,
|
17
|
+
# content_type: 'application/vnd.api+json')
|
18
|
+
# puts data
|
19
|
+
# puts response.status
|
20
|
+
Report.send_report report
|
21
|
+
end
|
22
|
+
end
|
data/lib/kishu/log.rb
ADDED
@@ -0,0 +1,33 @@
|
|
1
|
+
|
2
|
+
require 'thor'
|
3
|
+
|
4
|
+
require_relative 'merger'
|
5
|
+
require_relative 'utils'
|
6
|
+
require_relative 'base'
|
7
|
+
|
8
|
+
module Kishu
|
9
|
+
class Log < Thor
|
10
|
+
|
11
|
+
include Kishu::Base
|
12
|
+
include Kishu::Merger
|
13
|
+
include Kishu::Utils
|
14
|
+
|
15
|
+
|
16
|
+
desc "create logs", "create logs"
|
17
|
+
method_option :logs_bucket, :default => ENV['S3_RESOLUTION_LOGS_BUCKET']
|
18
|
+
method_option :output_bucket, :default => ENV['S3_MERGED_LOGS_BUCKET']
|
19
|
+
method_option :month_year, :type => :string, :default => "201804"
|
20
|
+
|
21
|
+
def create
|
22
|
+
return "Logs don't exist" unless File.directory?(options[:month_year])
|
23
|
+
return "Pipeline has events" unless Pipeline.new.is_empty?
|
24
|
+
@log_date = get_date options[:month_year]
|
25
|
+
@folder = options[:month_year]
|
26
|
+
puts @log_date
|
27
|
+
uncompress_files
|
28
|
+
# add_bookends
|
29
|
+
merge_files
|
30
|
+
sort_files
|
31
|
+
end
|
32
|
+
end
|
33
|
+
end
|
data/lib/kishu/merger.rb
ADDED
@@ -0,0 +1,69 @@
|
|
1
|
+
|
2
|
+
require 'date'
|
3
|
+
|
4
|
+
module Kishu
|
5
|
+
module Merger
|
6
|
+
|
7
|
+
FILE_STEM = "DataCite-access.log"
|
8
|
+
|
9
|
+
def get_date filename
|
10
|
+
Date.parse("#{filename}01")
|
11
|
+
end
|
12
|
+
|
13
|
+
|
14
|
+
def uncompress_files
|
15
|
+
system("gunzip #{resolution_logs_folder}/#{FILE_STEM}-*")
|
16
|
+
end
|
17
|
+
|
18
|
+
|
19
|
+
# def add_bookends
|
20
|
+
# File.delete("#{resolution_logs_folder}/#{FILE_STEM}-1-begin.log") if File.exist?("#{resolution_logs_folder}/#{FILE_STEM}-1-begin.log")
|
21
|
+
# File.delete("#{resolution_logs_folder}/#{FILE_STEM}-9-eof.log") if File.exist?("#{resolution_logs_folder}/#{FILE_STEM}-9-eof.log")
|
22
|
+
|
23
|
+
# begin_date = Date.civil(@log_date.year,@log_date.month,1).strftime("%Y-%m-%d")
|
24
|
+
# end_date = Date.civil(@log_date.year,@log_date.month+1, 1).strftime("%Y-%m-%d")
|
25
|
+
|
26
|
+
# begin_line = '0.0.0.0 HTTP:HDL "'+begin_date+' 00:00:00.000Z" 1 1 22ms 10.5281/zenodo.1043571 "300:10.admin/codata" "" "Mozilla"'+"\n"
|
27
|
+
# puts begin_line
|
28
|
+
|
29
|
+
# end_line = '0.0.0.0 HTTP:HDL "'+end_date+' 00:01:00.000Z" 1 1 22ms 10.5281/zenodo.1043571 "300:10.admin/codata" "" "Mozilla"'+"\n"
|
30
|
+
# puts end_line
|
31
|
+
|
32
|
+
# File.open("#{resolution_logs_folder}/#{FILE_STEM}-1-begin.log","w") {|f| f.write(begin_line) }
|
33
|
+
# File.open("#{resolution_logs_folder}/#{FILE_STEM}-9-eof.log","w") {|f| f.write(end_line) }
|
34
|
+
# end
|
35
|
+
|
36
|
+
def merged_file
|
37
|
+
"#{merged_logs_folder}/datacite_resolution_logs_#{@log_date}.log"
|
38
|
+
end
|
39
|
+
|
40
|
+
def sorted_file
|
41
|
+
"#{resolution_logs_folder}/datacite_resolution_logs_#{@log_date}_sorted.log"
|
42
|
+
end
|
43
|
+
|
44
|
+
def resolution_logs_folder
|
45
|
+
bucket = S3_RESOLUTION_LOGS_BUCKET
|
46
|
+
"#{bucket}#{@folder}"
|
47
|
+
end
|
48
|
+
|
49
|
+
def merged_logs_folder
|
50
|
+
bucket = S3_MERGED_LOGS_BUCKET
|
51
|
+
"#{bucket}#{@folder}"
|
52
|
+
end
|
53
|
+
|
54
|
+
def merge_files
|
55
|
+
File.delete(merged_file) if File.exist?(merged_file)
|
56
|
+
|
57
|
+
system("cat #{resolution_logs_folder}/#{FILE_STEM}-* > #{merged_file}")
|
58
|
+
puts "Merged Completed"
|
59
|
+
end
|
60
|
+
|
61
|
+
def sort_files
|
62
|
+
File.delete(sorted_file) if File.exist?(sorted_file)
|
63
|
+
|
64
|
+
system("sort -k3 #{merged_file} > #{sorted_file}")
|
65
|
+
puts "Sorted Completed"
|
66
|
+
puts sorted_file
|
67
|
+
end
|
68
|
+
end
|
69
|
+
end
|
@@ -0,0 +1,29 @@
|
|
1
|
+
require 'faraday'
|
2
|
+
require 'logger'
|
3
|
+
|
4
|
+
require_relative 'utils'
|
5
|
+
require_relative 'base'
|
6
|
+
|
7
|
+
module Kishu
|
8
|
+
class Pipeline
|
9
|
+
|
10
|
+
def initialize
|
11
|
+
@conn = Faraday.new(:url => LOGSTASH_HOST)
|
12
|
+
# logger = Logger.new(STDOUT)
|
13
|
+
# logger.info
|
14
|
+
end
|
15
|
+
|
16
|
+
def is_ready?
|
17
|
+
main = @conn.get do |req|
|
18
|
+
req.url '/_node/stats/pipelines/main'
|
19
|
+
end
|
20
|
+
return nil unless main.dig("pipelines","main","events","out") == 0
|
21
|
+
end
|
22
|
+
|
23
|
+
def is_running?
|
24
|
+
|
25
|
+
end
|
26
|
+
|
27
|
+
end
|
28
|
+
end
|
29
|
+
|
data/lib/kishu/report.rb
ADDED
@@ -0,0 +1,149 @@
|
|
1
|
+
require 'json'
|
2
|
+
require 'date'
|
3
|
+
require "faraday"
|
4
|
+
require 'securerandom'
|
5
|
+
require 'zlib'
|
6
|
+
require 'digest'
|
7
|
+
|
8
|
+
require_relative 'resolution_event'
|
9
|
+
require_relative 'client'
|
10
|
+
|
11
|
+
module Kishu
|
12
|
+
class Report
|
13
|
+
|
14
|
+
include Kishu::Base
|
15
|
+
include Kishu::Utils
|
16
|
+
|
17
|
+
def initialize options={}
|
18
|
+
set_period
|
19
|
+
@es_client = Client.new()
|
20
|
+
@logger = Logger.new(STDOUT)
|
21
|
+
@report_id = options[:report_id] ? options[:report_id] : ""
|
22
|
+
@total = 0
|
23
|
+
@aggs_size = options[:aggs_size]
|
24
|
+
@chunk_size = options[:chunk_size]
|
25
|
+
@after = options[:after_key] ||=""
|
26
|
+
end
|
27
|
+
|
28
|
+
def report_period options={}
|
29
|
+
es_client = Client.new()
|
30
|
+
|
31
|
+
logdate = es_client.get_logdate({aggs_size: 1})
|
32
|
+
puts logdate
|
33
|
+
Date.parse(logdate)
|
34
|
+
end
|
35
|
+
|
36
|
+
|
37
|
+
def get_events options={}
|
38
|
+
logger = Logger.new(STDOUT)
|
39
|
+
es_client = Client.new()
|
40
|
+
response = es_client.get({aggs_size: @aggs_size || 500, after_key: options[:after_key] ||=""})
|
41
|
+
aggs = response.dig("aggregations","doi","buckets")
|
42
|
+
x = aggs.map do |agg|
|
43
|
+
ResolutionEvent.new(agg,{period: @period, report_id: @report_id}).wrap_event
|
44
|
+
end
|
45
|
+
after = response.dig("aggregations","doi").fetch("after_key",{"doi"=>nil}).dig("doi")
|
46
|
+
logger.info "After_key for pagination #{after}"
|
47
|
+
y = {data: x, after: after}
|
48
|
+
y
|
49
|
+
end
|
50
|
+
|
51
|
+
|
52
|
+
def generate_dataset_array
|
53
|
+
@datasets = []
|
54
|
+
loop do
|
55
|
+
response = get_events({after_key: @after ||=""})
|
56
|
+
@datasets = @datasets.concat response[:data]
|
57
|
+
@after = response[:after]
|
58
|
+
@total += @datasets.size
|
59
|
+
generate_chunk_report if @datasets.size > @chunk_size
|
60
|
+
break if @after.nil?
|
61
|
+
end
|
62
|
+
end
|
63
|
+
|
64
|
+
def compress report
|
65
|
+
# report = File.read(hash)
|
66
|
+
gzip = Zlib::GzipWriter.new(StringIO.new)
|
67
|
+
string = report.to_json
|
68
|
+
gzip << string
|
69
|
+
body = gzip.close.string
|
70
|
+
body
|
71
|
+
end
|
72
|
+
|
73
|
+
|
74
|
+
def generate_chunk_report
|
75
|
+
# puts get_template
|
76
|
+
# LagottoJob.perform_async(get_template(@datasets))
|
77
|
+
file = merged_file #+ 'after_key_' + @after
|
78
|
+
File.open(file,"w") do |f|
|
79
|
+
f.write(JSON.pretty_generate get_template)
|
80
|
+
end
|
81
|
+
send_report get_template
|
82
|
+
@datasets = []
|
83
|
+
end
|
84
|
+
|
85
|
+
def make_report options={}
|
86
|
+
generate_dataset_array
|
87
|
+
@logger.info "#{LOGS_TAG} Month of #{@period.dig("begin-date")} sent to Hub in report #{@uid} with stats for #{@total} datasets"
|
88
|
+
end
|
89
|
+
|
90
|
+
|
91
|
+
def set_period
|
92
|
+
report_period
|
93
|
+
@period = {
|
94
|
+
"begin-date": Date.civil(report_period.year, report_period.mon, 1).strftime("%Y-%m-%d"),
|
95
|
+
"end-date": Date.civil(report_period.year, report_period.mon, -1).strftime("%Y-%m-%d"),
|
96
|
+
}
|
97
|
+
end
|
98
|
+
|
99
|
+
def send_report report, options={}
|
100
|
+
uri = HUB_URL+'/reports'
|
101
|
+
puts uri
|
102
|
+
|
103
|
+
headers = {
|
104
|
+
content_type: "application/gzip",
|
105
|
+
content_encoding: 'gzip',
|
106
|
+
accept: 'gzip'
|
107
|
+
}
|
108
|
+
|
109
|
+
body = compress(report)
|
110
|
+
n = 0
|
111
|
+
loop do
|
112
|
+
request = Maremma.post(uri, data: body,
|
113
|
+
bearer: ENV['HUB_TOKEN'],
|
114
|
+
headers: headers,
|
115
|
+
timeout: 100)
|
116
|
+
|
117
|
+
@uid = request.body.dig("data","report","id")
|
118
|
+
@logger.info "#{LOGS_TAG} Hub response #{request.status} for Report finishing in #{@after}"
|
119
|
+
@logger.info "#{LOGS_TAG} Hub response #{@uid} for Report finishing in #{@after}"
|
120
|
+
n += 1
|
121
|
+
break if request.status == 201
|
122
|
+
fail "#{LOGS_TAG} Too many attempts were tried to push this report" if n > 1
|
123
|
+
sleep 1
|
124
|
+
end
|
125
|
+
end
|
126
|
+
|
127
|
+
def get_template
|
128
|
+
{
|
129
|
+
"report-header": get_header,
|
130
|
+
"report-datasets": @datasets
|
131
|
+
}
|
132
|
+
end
|
133
|
+
|
134
|
+
def get_header
|
135
|
+
{
|
136
|
+
"report-name": "resolution report",
|
137
|
+
"report-id": "dsr",
|
138
|
+
release: "drl",
|
139
|
+
created: Date.today.strftime("%Y-%m-%d"),
|
140
|
+
"created-by": "datacite",
|
141
|
+
"reporting-period": @period,
|
142
|
+
"report-filters": [],
|
143
|
+
"report-attributes": [],
|
144
|
+
exceptions: [{code: 69,severity: "warning", message: "Report is compressed using gzip","help-url": "https://github.com/datacite/sashimi",data: "usage data needs to be uncompressed"}]
|
145
|
+
}
|
146
|
+
end
|
147
|
+
|
148
|
+
end
|
149
|
+
end
|