kishu 0.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +36 -0
- data/.rspec +3 -0
- data/.travis.yml +7 -0
- data/CODE_OF_CONDUCT.md +74 -0
- data/Gemfile +6 -0
- data/Gemfile.lock +222 -0
- data/LICENSE.txt +21 -0
- data/README.md +47 -0
- data/Rakefile +20 -0
- data/bin/kishu +5 -0
- data/kishu.gemspec +54 -0
- data/lib/kishu.rb +30 -0
- data/lib/kishu/base.rb +14 -0
- data/lib/kishu/cli.rb +42 -0
- data/lib/kishu/client.rb +89 -0
- data/lib/kishu/lagotto_job.rb +22 -0
- data/lib/kishu/log.rb +33 -0
- data/lib/kishu/merger.rb +69 -0
- data/lib/kishu/pipeline.rb +29 -0
- data/lib/kishu/report.rb +149 -0
- data/lib/kishu/resolution_event.rb +83 -0
- data/lib/kishu/s3.rb +24 -0
- data/lib/kishu/sushi.rb +59 -0
- data/lib/kishu/usage_event.rb +124 -0
- data/lib/kishu/utils.rb +115 -0
- data/lib/kishu/version.rb +3 -0
- data/spec/factories/default.rb +71 -0
- data/spec/fixtures/vcr_cassettes/Kishu_Sushi/wrap_event/when_doi_doesn_t_exist/should_fail.yml +3867 -0
- data/spec/kishu_spec.rb +9 -0
- data/spec/report_spec.rb +79 -0
- data/spec/resolution_event_spec.rb +80 -0
- data/spec/spec_helper.rb +93 -0
- metadata +400 -0
data/bin/kishu
ADDED
data/kishu.gemspec
ADDED
@@ -0,0 +1,54 @@
|
|
1
|
+
require "date"
|
2
|
+
require File.expand_path("../lib/kishu/version", __FILE__)
|
3
|
+
|
4
|
+
|
5
|
+
Gem::Specification.new do |spec|
|
6
|
+
spec.name = "kishu"
|
7
|
+
spec.version = Kishu::VERSION
|
8
|
+
spec.authors = ["Kristian Garza"]
|
9
|
+
spec.email = ["kgarza@datacite.org"]
|
10
|
+
|
11
|
+
spec.summary = "Client for DOI Resolution Logs processing pipeline"
|
12
|
+
spec.description = "This client helps you to prepare logs to be consumed for the pipeline as well as for creating DOI resolution reports using the COUNTER CoP "
|
13
|
+
spec.homepage = "https://github.com/datacite/kishu"
|
14
|
+
spec.license = "MIT"
|
15
|
+
|
16
|
+
# Prevent pushing this gem to RubyGems.org. To allow pushes either set the 'allowed_push_host'
|
17
|
+
# to allow pushing to a single host or delete this section to allow pushing to any host.
|
18
|
+
# if spec.respond_to?(:metadata)
|
19
|
+
# spec.metadata["allowed_push_host"] = "TODO: Set to 'http://mygemserver.com'"
|
20
|
+
# else
|
21
|
+
# raise "RubyGems 2.0 or newer is required to protect against " \
|
22
|
+
# "public gem pushes."
|
23
|
+
# end
|
24
|
+
|
25
|
+
# Specify which files should be added to the gem when it is released.
|
26
|
+
# The `git ls-files -z` loads the files in the RubyGem that have been added into git.
|
27
|
+
|
28
|
+
# spec.add_dependency 'maremma', '>= 4.1', '< 5'
|
29
|
+
spec.add_dependency 'faraday', "~>0.15.3"
|
30
|
+
spec.add_dependency 'builder', '~> 3.2', '>= 3.2.2'
|
31
|
+
spec.add_dependency 'dotenv', '~> 2.1', '>= 2.1.1'
|
32
|
+
spec.add_dependency 'thor', '~> 0.19'
|
33
|
+
spec.add_dependency 'maremma', '>= 4.1', '< 5'
|
34
|
+
spec.add_dependency 'faraday_middleware-aws-sigv4', '~> 0.2.4'
|
35
|
+
spec.add_development_dependency "bundler", "~> 1.16"
|
36
|
+
spec.add_development_dependency "rake", "~> 10.0"
|
37
|
+
spec.add_development_dependency "rspec", "~> 3.0"
|
38
|
+
spec.add_development_dependency 'elasticsearch', '~> 6.1.0'
|
39
|
+
spec.add_development_dependency "thor", '~> 0.19'
|
40
|
+
spec.add_development_dependency "faraday", "~>0.15.3"
|
41
|
+
spec.add_development_dependency 'rack-test', '~> 0'
|
42
|
+
spec.add_development_dependency 'vcr', '~> 3.0', '>= 3.0.3'
|
43
|
+
spec.add_development_dependency 'webmock', '~> 3.0', '>= 3.0.1'
|
44
|
+
spec.add_development_dependency 'simplecov', '~> 0.14.1'
|
45
|
+
spec.add_development_dependency 'factory_bot', '~> 4.0'
|
46
|
+
spec.add_dependency 'sucker_punch', '~> 2.0'
|
47
|
+
spec.add_dependency 'bolognese', '~> 0.9', '>= 0.10'
|
48
|
+
spec.add_dependency 'elasticsearch', '~> 6.1.0'
|
49
|
+
|
50
|
+
spec.files = `git ls-files`.split($/)
|
51
|
+
spec.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
|
52
|
+
spec.executables = ["kishu"]
|
53
|
+
spec.require_paths = ["lib"]
|
54
|
+
end
|
data/lib/kishu.rb
ADDED
@@ -0,0 +1,30 @@
|
|
1
|
+
require "kishu/resolution_event"
|
2
|
+
require "kishu/usage_event"
|
3
|
+
require "kishu/report"
|
4
|
+
require "kishu/cli"
|
5
|
+
require "kishu/sushi"
|
6
|
+
require "kishu/version"
|
7
|
+
require "kishu/client"
|
8
|
+
require "kishu/log"
|
9
|
+
require "kishu/pipeline"
|
10
|
+
require "kishu/lagotto_job"
|
11
|
+
|
12
|
+
|
13
|
+
API_URL = ENV['API_URL'] ? ENV['API_URL'] : "https://api.datacite.org"
|
14
|
+
HUB_URL = ENV['HUB_URL'] ? ENV['HUB_URL'] : "https://api.test.datacite.org"
|
15
|
+
HUB_TOKEN = ENV['HUB_TOKEN'] ? ENV['HUB_TOKEN'] : ""
|
16
|
+
ES_HOST = ENV['ES_HOST'] ? ENV['ES_HOST'] : "localhost:9200"
|
17
|
+
ES_INDEX = ENV['ES_INDEX'] ? ENV['ES_INDEX'] : "resolutions"
|
18
|
+
LOGSTASH_HOST = ENV['LOGSTASH_HOST'] ? ENV['LOGSTASH_HOST'] : "localhost:9600"
|
19
|
+
LAGOTTINO_URL = ENV['LAGOTTINO_URL'] ? ENV['LAGOTTINO_URL'] : "https://api.test.datacite.org"
|
20
|
+
LAGOTTINO_TOKEN = ENV['LAGOTTINO_TOKEN'] ? ENV['LAGOTTINO_TOKEN'] : ""
|
21
|
+
LICENSE = ENV['LICENSE'] ? ENV['LICENSE'] : "https://creativecommons.org/publicdomain/zero/1.0/"
|
22
|
+
SOURCE_TOKEN = ENV['SOURCE_TOKEN'] ? ENV['SOURCE_TOKEN'] : "65903a54-01c8-4a3f-9bf2-04ecc658247a"
|
23
|
+
S3_MERGED_LOGS_BUCKET = ENV['S3_MERGED_LOGS_BUCKET'] ? ENV['S3_MERGED_LOGS_BUCKET'] : "./monthly_logs"
|
24
|
+
S3_RESOLUTION_LOGS_BUCKET = ENV['S3_RESOLUTION_LOGS_BUCKET'] ? ENV['S3_RESOLUTION_LOGS_BUCKET'] : "./"
|
25
|
+
AWS_REGION = ENV['AWS_REGION'] ? ENV['AWS_REGION'] : ""
|
26
|
+
AWS_ACCESS_KEY_ID = ENV['AWS_ACCESS_KEY_ID'] ? ENV['AWS_ACCESS_KEY_ID'] : ""
|
27
|
+
AWS_SECRET_ACCESS_KEY = ENV['AWS_SECRET_ACCESS_KEY'] ? ENV['AWS_SECRET_ACCESS_KEY'] : ""
|
28
|
+
ELASTIC_PASSWORD = ENV['ELASTIC_PASSWORD'] ? ENV['ELASTIC_PASSWORD'] : ""
|
29
|
+
LOGS_TAG = "[Resolution Logs]"
|
30
|
+
puts ENV.to_a
|
data/lib/kishu/base.rb
ADDED
@@ -0,0 +1,14 @@
|
|
1
|
+
require 'elasticsearch'
|
2
|
+
require 'json'
|
3
|
+
require 'faraday'
|
4
|
+
|
5
|
+
|
6
|
+
|
7
|
+
module Kishu
|
8
|
+
module Base
|
9
|
+
ES_HOST = ENV['ES_HOST'] ? ENV['ES_HOST'] : "localhost:9200"
|
10
|
+
|
11
|
+
# __elasticsearch__ = Faraday.new(url: ES_HOST)
|
12
|
+
__elasticsearch__ = Elasticsearch::Client.new host: ES_HOST, transport_options: { request: { timeout: 3600, open_timeout: 3600 } }
|
13
|
+
end
|
14
|
+
end
|
data/lib/kishu/cli.rb
ADDED
@@ -0,0 +1,42 @@
|
|
1
|
+
require 'thor'
|
2
|
+
|
3
|
+
|
4
|
+
require_relative 'sushi'
|
5
|
+
require_relative 'log'
|
6
|
+
|
7
|
+
|
8
|
+
module Kishu
|
9
|
+
class CLI < Thor
|
10
|
+
|
11
|
+
include Kishu::Base
|
12
|
+
include Kishu::Utils
|
13
|
+
# include Kishu::Report
|
14
|
+
include Kishu::Merger
|
15
|
+
# include Kishu::Event
|
16
|
+
|
17
|
+
# load ENV variables from .env file if it exists
|
18
|
+
env_file = File.expand_path("../../.env", __FILE__)
|
19
|
+
if File.exist?(env_file)
|
20
|
+
require 'dotenv'
|
21
|
+
Dotenv.overload env_file
|
22
|
+
end
|
23
|
+
|
24
|
+
def self.exit_on_failure?
|
25
|
+
true
|
26
|
+
end
|
27
|
+
|
28
|
+
# from http://stackoverflow.com/questions/22809972/adding-a-version-option-to-a-ruby-thor-cli
|
29
|
+
map %w[--version -v] => :__print_version
|
30
|
+
|
31
|
+
desc "--version, -v", "print the version"
|
32
|
+
def __print_version
|
33
|
+
puts Kishu::VERSION
|
34
|
+
end
|
35
|
+
|
36
|
+
desc "sushi SUBCOMMAND", "sushi commands"
|
37
|
+
subcommand "sushi", Kishu::Sushi
|
38
|
+
|
39
|
+
desc "log SUBCOMMAND", "log commands"
|
40
|
+
subcommand "log", Kishu::Log
|
41
|
+
end
|
42
|
+
end
|
data/lib/kishu/client.rb
ADDED
@@ -0,0 +1,89 @@
|
|
1
|
+
require 'faraday_middleware'
|
2
|
+
require 'faraday_middleware/aws_sigv4'
|
3
|
+
require 'logger'
|
4
|
+
|
5
|
+
require_relative 'utils'
|
6
|
+
require_relative 'base'
|
7
|
+
|
8
|
+
module Kishu
|
9
|
+
class Client
|
10
|
+
|
11
|
+
def initialize
|
12
|
+
|
13
|
+
if ES_HOST == "localhost:9200" || ES_HOST == "elasticsearch:9200"
|
14
|
+
@client = Elasticsearch::Client.new(host: ES_HOST, user: "elastic", password: ELASTIC_PASSWORD, transport_options: { request: { timeout: 3600, open_timeout: 3600 }}) do |f|
|
15
|
+
f.adapter Faraday.default_adapter
|
16
|
+
end
|
17
|
+
else
|
18
|
+
@client = Elasticsearch::Client.new(host: ES_HOST, port: '80', scheme: 'http') do |f|
|
19
|
+
f.request :aws_sigv4,
|
20
|
+
service: 'es',
|
21
|
+
region: AWS_REGION,
|
22
|
+
access_key_id: AWS_ACCESS_KEY_ID,
|
23
|
+
secret_access_key: AWS_SECRET_ACCESS_KEY
|
24
|
+
f.adapter Faraday.default_adapter
|
25
|
+
end
|
26
|
+
end
|
27
|
+
@client
|
28
|
+
end
|
29
|
+
|
30
|
+
|
31
|
+
def get options={}
|
32
|
+
|
33
|
+
x =@client.search(body:{
|
34
|
+
size: options[:size] ||= 0,
|
35
|
+
query: {
|
36
|
+
query_string: {
|
37
|
+
query: "*"
|
38
|
+
}
|
39
|
+
},
|
40
|
+
aggregations: aggregations(options)
|
41
|
+
},
|
42
|
+
index: ES_INDEX
|
43
|
+
)
|
44
|
+
x
|
45
|
+
end
|
46
|
+
|
47
|
+
def is_empty?
|
48
|
+
return true unless get
|
49
|
+
nil
|
50
|
+
end
|
51
|
+
|
52
|
+
def clear_index
|
53
|
+
@client.indices.delete index: ES_INDEX
|
54
|
+
puts "Resolutions index has been deleted"
|
55
|
+
end
|
56
|
+
|
57
|
+
|
58
|
+
def get_logdate options={}
|
59
|
+
@client.search(body:{
|
60
|
+
size: 1,
|
61
|
+
query: {
|
62
|
+
query_string: {
|
63
|
+
query: "*"
|
64
|
+
}
|
65
|
+
},
|
66
|
+
aggregations: aggregations(options)
|
67
|
+
},
|
68
|
+
index: "resolutions"
|
69
|
+
).dig("hits","hits",0,"_source","logdate")
|
70
|
+
end
|
71
|
+
|
72
|
+
def aggregations options={}
|
73
|
+
{
|
74
|
+
doi: {composite: {
|
75
|
+
sources: [{doi: {terms: {field: :doi }}}],
|
76
|
+
after: { doi: options.fetch(:after_key,"")},
|
77
|
+
size: options[:aggs_size]
|
78
|
+
},
|
79
|
+
aggs: {
|
80
|
+
unique: {terms: {field: "unique_usage"}},
|
81
|
+
totale: {terms: {field: "total_usage" }}
|
82
|
+
}
|
83
|
+
}
|
84
|
+
}
|
85
|
+
end
|
86
|
+
|
87
|
+
end
|
88
|
+
end
|
89
|
+
|
@@ -0,0 +1,22 @@
|
|
1
|
+
require_relative 'resolution_event'
|
2
|
+
|
3
|
+
|
4
|
+
class LagottoJob
|
5
|
+
include SuckerPunch::Job
|
6
|
+
include Kishu::Utils
|
7
|
+
workers 4
|
8
|
+
|
9
|
+
|
10
|
+
|
11
|
+
def perform(report, options={})
|
12
|
+
# data = format_instance event, options
|
13
|
+
|
14
|
+
# push_url = LAGOTTINO_URL + "/events"
|
15
|
+
# response = Maremma.post(push_url, data: data.to_json,
|
16
|
+
# bearer: LAGOTTINO_TOKEN,
|
17
|
+
# content_type: 'application/vnd.api+json')
|
18
|
+
# puts data
|
19
|
+
# puts response.status
|
20
|
+
Report.send_report report
|
21
|
+
end
|
22
|
+
end
|
data/lib/kishu/log.rb
ADDED
@@ -0,0 +1,33 @@
|
|
1
|
+
|
2
|
+
require 'thor'
|
3
|
+
|
4
|
+
require_relative 'merger'
|
5
|
+
require_relative 'utils'
|
6
|
+
require_relative 'base'
|
7
|
+
|
8
|
+
module Kishu
|
9
|
+
class Log < Thor
|
10
|
+
|
11
|
+
include Kishu::Base
|
12
|
+
include Kishu::Merger
|
13
|
+
include Kishu::Utils
|
14
|
+
|
15
|
+
|
16
|
+
desc "create logs", "create logs"
|
17
|
+
method_option :logs_bucket, :default => ENV['S3_RESOLUTION_LOGS_BUCKET']
|
18
|
+
method_option :output_bucket, :default => ENV['S3_MERGED_LOGS_BUCKET']
|
19
|
+
method_option :month_year, :type => :string, :default => "201804"
|
20
|
+
|
21
|
+
def create
|
22
|
+
return "Logs don't exist" unless File.directory?(options[:month_year])
|
23
|
+
return "Pipeline has events" unless Pipeline.new.is_empty?
|
24
|
+
@log_date = get_date options[:month_year]
|
25
|
+
@folder = options[:month_year]
|
26
|
+
puts @log_date
|
27
|
+
uncompress_files
|
28
|
+
# add_bookends
|
29
|
+
merge_files
|
30
|
+
sort_files
|
31
|
+
end
|
32
|
+
end
|
33
|
+
end
|
data/lib/kishu/merger.rb
ADDED
@@ -0,0 +1,69 @@
|
|
1
|
+
|
2
|
+
require 'date'
|
3
|
+
|
4
|
+
module Kishu
|
5
|
+
module Merger
|
6
|
+
|
7
|
+
FILE_STEM = "DataCite-access.log"
|
8
|
+
|
9
|
+
def get_date filename
|
10
|
+
Date.parse("#{filename}01")
|
11
|
+
end
|
12
|
+
|
13
|
+
|
14
|
+
def uncompress_files
|
15
|
+
system("gunzip #{resolution_logs_folder}/#{FILE_STEM}-*")
|
16
|
+
end
|
17
|
+
|
18
|
+
|
19
|
+
# def add_bookends
|
20
|
+
# File.delete("#{resolution_logs_folder}/#{FILE_STEM}-1-begin.log") if File.exist?("#{resolution_logs_folder}/#{FILE_STEM}-1-begin.log")
|
21
|
+
# File.delete("#{resolution_logs_folder}/#{FILE_STEM}-9-eof.log") if File.exist?("#{resolution_logs_folder}/#{FILE_STEM}-9-eof.log")
|
22
|
+
|
23
|
+
# begin_date = Date.civil(@log_date.year,@log_date.month,1).strftime("%Y-%m-%d")
|
24
|
+
# end_date = Date.civil(@log_date.year,@log_date.month+1, 1).strftime("%Y-%m-%d")
|
25
|
+
|
26
|
+
# begin_line = '0.0.0.0 HTTP:HDL "'+begin_date+' 00:00:00.000Z" 1 1 22ms 10.5281/zenodo.1043571 "300:10.admin/codata" "" "Mozilla"'+"\n"
|
27
|
+
# puts begin_line
|
28
|
+
|
29
|
+
# end_line = '0.0.0.0 HTTP:HDL "'+end_date+' 00:01:00.000Z" 1 1 22ms 10.5281/zenodo.1043571 "300:10.admin/codata" "" "Mozilla"'+"\n"
|
30
|
+
# puts end_line
|
31
|
+
|
32
|
+
# File.open("#{resolution_logs_folder}/#{FILE_STEM}-1-begin.log","w") {|f| f.write(begin_line) }
|
33
|
+
# File.open("#{resolution_logs_folder}/#{FILE_STEM}-9-eof.log","w") {|f| f.write(end_line) }
|
34
|
+
# end
|
35
|
+
|
36
|
+
def merged_file
|
37
|
+
"#{merged_logs_folder}/datacite_resolution_logs_#{@log_date}.log"
|
38
|
+
end
|
39
|
+
|
40
|
+
def sorted_file
|
41
|
+
"#{resolution_logs_folder}/datacite_resolution_logs_#{@log_date}_sorted.log"
|
42
|
+
end
|
43
|
+
|
44
|
+
def resolution_logs_folder
|
45
|
+
bucket = S3_RESOLUTION_LOGS_BUCKET
|
46
|
+
"#{bucket}#{@folder}"
|
47
|
+
end
|
48
|
+
|
49
|
+
def merged_logs_folder
|
50
|
+
bucket = S3_MERGED_LOGS_BUCKET
|
51
|
+
"#{bucket}#{@folder}"
|
52
|
+
end
|
53
|
+
|
54
|
+
def merge_files
|
55
|
+
File.delete(merged_file) if File.exist?(merged_file)
|
56
|
+
|
57
|
+
system("cat #{resolution_logs_folder}/#{FILE_STEM}-* > #{merged_file}")
|
58
|
+
puts "Merged Completed"
|
59
|
+
end
|
60
|
+
|
61
|
+
def sort_files
|
62
|
+
File.delete(sorted_file) if File.exist?(sorted_file)
|
63
|
+
|
64
|
+
system("sort -k3 #{merged_file} > #{sorted_file}")
|
65
|
+
puts "Sorted Completed"
|
66
|
+
puts sorted_file
|
67
|
+
end
|
68
|
+
end
|
69
|
+
end
|
@@ -0,0 +1,29 @@
|
|
1
|
+
require 'faraday'
|
2
|
+
require 'logger'
|
3
|
+
|
4
|
+
require_relative 'utils'
|
5
|
+
require_relative 'base'
|
6
|
+
|
7
|
+
module Kishu
|
8
|
+
class Pipeline
|
9
|
+
|
10
|
+
def initialize
|
11
|
+
@conn = Faraday.new(:url => LOGSTASH_HOST)
|
12
|
+
# logger = Logger.new(STDOUT)
|
13
|
+
# logger.info
|
14
|
+
end
|
15
|
+
|
16
|
+
def is_ready?
|
17
|
+
main = @conn.get do |req|
|
18
|
+
req.url '/_node/stats/pipelines/main'
|
19
|
+
end
|
20
|
+
return nil unless main.dig("pipelines","main","events","out") == 0
|
21
|
+
end
|
22
|
+
|
23
|
+
def is_running?
|
24
|
+
|
25
|
+
end
|
26
|
+
|
27
|
+
end
|
28
|
+
end
|
29
|
+
|
data/lib/kishu/report.rb
ADDED
@@ -0,0 +1,149 @@
|
|
1
|
+
require 'json'
|
2
|
+
require 'date'
|
3
|
+
require "faraday"
|
4
|
+
require 'securerandom'
|
5
|
+
require 'zlib'
|
6
|
+
require 'digest'
|
7
|
+
|
8
|
+
require_relative 'resolution_event'
|
9
|
+
require_relative 'client'
|
10
|
+
|
11
|
+
module Kishu
|
12
|
+
class Report
|
13
|
+
|
14
|
+
include Kishu::Base
|
15
|
+
include Kishu::Utils
|
16
|
+
|
17
|
+
def initialize options={}
|
18
|
+
set_period
|
19
|
+
@es_client = Client.new()
|
20
|
+
@logger = Logger.new(STDOUT)
|
21
|
+
@report_id = options[:report_id] ? options[:report_id] : ""
|
22
|
+
@total = 0
|
23
|
+
@aggs_size = options[:aggs_size]
|
24
|
+
@chunk_size = options[:chunk_size]
|
25
|
+
@after = options[:after_key] ||=""
|
26
|
+
end
|
27
|
+
|
28
|
+
def report_period options={}
|
29
|
+
es_client = Client.new()
|
30
|
+
|
31
|
+
logdate = es_client.get_logdate({aggs_size: 1})
|
32
|
+
puts logdate
|
33
|
+
Date.parse(logdate)
|
34
|
+
end
|
35
|
+
|
36
|
+
|
37
|
+
def get_events options={}
|
38
|
+
logger = Logger.new(STDOUT)
|
39
|
+
es_client = Client.new()
|
40
|
+
response = es_client.get({aggs_size: @aggs_size || 500, after_key: options[:after_key] ||=""})
|
41
|
+
aggs = response.dig("aggregations","doi","buckets")
|
42
|
+
x = aggs.map do |agg|
|
43
|
+
ResolutionEvent.new(agg,{period: @period, report_id: @report_id}).wrap_event
|
44
|
+
end
|
45
|
+
after = response.dig("aggregations","doi").fetch("after_key",{"doi"=>nil}).dig("doi")
|
46
|
+
logger.info "After_key for pagination #{after}"
|
47
|
+
y = {data: x, after: after}
|
48
|
+
y
|
49
|
+
end
|
50
|
+
|
51
|
+
|
52
|
+
def generate_dataset_array
|
53
|
+
@datasets = []
|
54
|
+
loop do
|
55
|
+
response = get_events({after_key: @after ||=""})
|
56
|
+
@datasets = @datasets.concat response[:data]
|
57
|
+
@after = response[:after]
|
58
|
+
@total += @datasets.size
|
59
|
+
generate_chunk_report if @datasets.size > @chunk_size
|
60
|
+
break if @after.nil?
|
61
|
+
end
|
62
|
+
end
|
63
|
+
|
64
|
+
def compress report
|
65
|
+
# report = File.read(hash)
|
66
|
+
gzip = Zlib::GzipWriter.new(StringIO.new)
|
67
|
+
string = report.to_json
|
68
|
+
gzip << string
|
69
|
+
body = gzip.close.string
|
70
|
+
body
|
71
|
+
end
|
72
|
+
|
73
|
+
|
74
|
+
def generate_chunk_report
|
75
|
+
# puts get_template
|
76
|
+
# LagottoJob.perform_async(get_template(@datasets))
|
77
|
+
file = merged_file #+ 'after_key_' + @after
|
78
|
+
File.open(file,"w") do |f|
|
79
|
+
f.write(JSON.pretty_generate get_template)
|
80
|
+
end
|
81
|
+
send_report get_template
|
82
|
+
@datasets = []
|
83
|
+
end
|
84
|
+
|
85
|
+
def make_report options={}
|
86
|
+
generate_dataset_array
|
87
|
+
@logger.info "#{LOGS_TAG} Month of #{@period.dig("begin-date")} sent to Hub in report #{@uid} with stats for #{@total} datasets"
|
88
|
+
end
|
89
|
+
|
90
|
+
|
91
|
+
def set_period
|
92
|
+
report_period
|
93
|
+
@period = {
|
94
|
+
"begin-date": Date.civil(report_period.year, report_period.mon, 1).strftime("%Y-%m-%d"),
|
95
|
+
"end-date": Date.civil(report_period.year, report_period.mon, -1).strftime("%Y-%m-%d"),
|
96
|
+
}
|
97
|
+
end
|
98
|
+
|
99
|
+
def send_report report, options={}
|
100
|
+
uri = HUB_URL+'/reports'
|
101
|
+
puts uri
|
102
|
+
|
103
|
+
headers = {
|
104
|
+
content_type: "application/gzip",
|
105
|
+
content_encoding: 'gzip',
|
106
|
+
accept: 'gzip'
|
107
|
+
}
|
108
|
+
|
109
|
+
body = compress(report)
|
110
|
+
n = 0
|
111
|
+
loop do
|
112
|
+
request = Maremma.post(uri, data: body,
|
113
|
+
bearer: ENV['HUB_TOKEN'],
|
114
|
+
headers: headers,
|
115
|
+
timeout: 100)
|
116
|
+
|
117
|
+
@uid = request.body.dig("data","report","id")
|
118
|
+
@logger.info "#{LOGS_TAG} Hub response #{request.status} for Report finishing in #{@after}"
|
119
|
+
@logger.info "#{LOGS_TAG} Hub response #{@uid} for Report finishing in #{@after}"
|
120
|
+
n += 1
|
121
|
+
break if request.status == 201
|
122
|
+
fail "#{LOGS_TAG} Too many attempts were tried to push this report" if n > 1
|
123
|
+
sleep 1
|
124
|
+
end
|
125
|
+
end
|
126
|
+
|
127
|
+
def get_template
|
128
|
+
{
|
129
|
+
"report-header": get_header,
|
130
|
+
"report-datasets": @datasets
|
131
|
+
}
|
132
|
+
end
|
133
|
+
|
134
|
+
def get_header
|
135
|
+
{
|
136
|
+
"report-name": "resolution report",
|
137
|
+
"report-id": "dsr",
|
138
|
+
release: "drl",
|
139
|
+
created: Date.today.strftime("%Y-%m-%d"),
|
140
|
+
"created-by": "datacite",
|
141
|
+
"reporting-period": @period,
|
142
|
+
"report-filters": [],
|
143
|
+
"report-attributes": [],
|
144
|
+
exceptions: [{code: 69,severity: "warning", message: "Report is compressed using gzip","help-url": "https://github.com/datacite/sashimi",data: "usage data needs to be uncompressed"}]
|
145
|
+
}
|
146
|
+
end
|
147
|
+
|
148
|
+
end
|
149
|
+
end
|