kishu 0.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,83 @@
1
+ require 'faraday'
2
+ require 'logger'
3
+ require 'maremma'
4
+ require 'sucker_punch'
5
+
6
+ require_relative 'utils'
7
+ require_relative 'base'
8
+ require_relative 'lagotto_job'
9
+
10
+ module Kishu
11
+ class ResolutionEvent
12
+
13
+ include Kishu::Utils
14
+
15
+ def initialize(event, options={})
16
+ @event = event
17
+ @logger = Logger.new(STDOUT)
18
+ @period = options[:period]
19
+ end
20
+
21
+ def wrap_event
22
+ # puts "------------------ \n"
23
+ totale = @event.dig("totale").fetch("buckets", [])
24
+ # puts @event.dig("unique").fetch("buckets", nil)
25
+ unique = @event.dig("unique").fetch("buckets", [])
26
+ # puts unique[1].dig('key')
27
+
28
+ unique_regular = unique.find_all {|access_method| access_method.fetch('key',"").match('regular') }
29
+ unique_machine = unique.find_all {|access_method| access_method.fetch('key',"").match('machine') }
30
+ total_regular = totale.find_all {|access_method| access_method.fetch('key',"").match('regular') }
31
+ total_machine = totale.find_all {|access_method| access_method.fetch('key',"").match('machine') }
32
+
33
+ dataset = {
34
+ doi: @event.dig("key","doi"),
35
+ unique_counts_regular: unique_regular.empty? ? 0 : unique_regular.size,
36
+ unique_counts_machine: unique_machine.empty? ? 0 : unique_machine.size,
37
+ total_counts_regular: total_regular.empty? ? 0 : total_regular.dig(0,"doc_count"),
38
+ total_counts_machine: total_machine.empty? ? 0 : total_machine.dig(0,"doc_count")
39
+ }
40
+
41
+
42
+ @doi = dataset.fetch(:doi,nil)
43
+
44
+ data = {}
45
+ instances =[
46
+ {
47
+ "count" => dataset.fetch(:total_counts_regular),
48
+ "access-method" => "regular",
49
+ "metric-type" => "total-resolutions"
50
+ },
51
+ {
52
+ "count" => dataset.fetch(:unique_counts_regular),
53
+ "access-method" => "regular",
54
+ "metric-type" => "unique-resolutions"
55
+ },
56
+ {
57
+ "count" => dataset.fetch(:unique_counts_machine),
58
+ "access-method" => "machine",
59
+ "metric-type" => "unique-resolutions"
60
+ },
61
+ {
62
+ "count" => dataset.fetch(:total_counts_machine),
63
+ "access-method" => "machine",
64
+ "metric-type" => "total-resolutions"
65
+ },
66
+ ]
67
+
68
+ instances.delete_if {|instance| instance.dig("count") < 1}
69
+
70
+
71
+ instanced = {
72
+ "dataset-id" => [{"type" => "doi", "value"=> dataset.fetch(:doi,nil)}],
73
+ "performance" => [{
74
+ "period"=> @period,
75
+ "instance"=> instances
76
+ }]
77
+ }
78
+ instanced
79
+ end
80
+
81
+
82
+ end
83
+ end
@@ -0,0 +1,24 @@
1
+
2
+ require 'aws-sdk-s3'
3
+
4
+ require_relative 'utils'
5
+ require_relative 'base'
6
+
7
+ module Kishu
8
+ class S3
9
+
10
+ def initialize
11
+ s3 = Aws::S3::Client.new
12
+ resp = s3.list_buckets
13
+ resp.buckets.map(&:name)
14
+ end
15
+
16
+ def download_logs
17
+ resp = s3.get_object(
18
+ response_target: '/logs',
19
+ bucket: S3_RESOLUTION_LOGS_BUCKET,
20
+ key: 'object-key')
21
+ resp.metadata
22
+ end
23
+ end
24
+ end
@@ -0,0 +1,59 @@
1
+
2
+ require 'thor'
3
+
4
+
5
+ require_relative 'resolution_event'
6
+ require_relative 'report'
7
+ require_relative 'utils'
8
+ require_relative 'base'
9
+
10
+ module Kishu
11
+ class Sushi < Thor
12
+
13
+ include Kishu::Base
14
+ include Kishu::Utils
15
+
16
+
17
+ desc "get sushi", "get resolution report"
18
+ # method_option :username, :default => ENV['MDS_USERNAME']
19
+ method_option :aggs_size, :type => :numeric, :default => 1000
20
+ method_option :month_year, :type => :string, :default => "2018-04"
21
+ def get
22
+ x =Report.new()
23
+ x.make_report(options)
24
+
25
+ end
26
+
27
+ method_option :month_year, :type => :string, :default => "2018-04"
28
+ method_option :after_key, :type => :string
29
+ def continue_report
30
+ x =Report.new()
31
+ x.generate_files(options)
32
+
33
+ end
34
+
35
+ desc "clean_all sushi", "clean index"
36
+ method_option :month_year, :type => :string, :default => "2018-04"
37
+ method_option :after_key, :type => :string
38
+ def clean_all
39
+ x =Client.new()
40
+ x.clear_index
41
+
42
+ end
43
+
44
+
45
+ desc "send_report_events sushi", "send_report_events index"
46
+ method_option :month_year, :type => :string, :default => "2018-04"
47
+ method_option :after_key, :type => :string
48
+ method_option :chunk_size, :type => :numeric, :default => 40000
49
+ method_option :aggs_size, :type => :numeric, :default => 500
50
+ def send_report_events
51
+ fail "You need to set your JWT" if HUB_TOKEN.blank?
52
+ x =Report.new(options)
53
+ x.make_report(options)
54
+
55
+ end
56
+
57
+
58
+ end
59
+ end
@@ -0,0 +1,124 @@
1
+ require 'faraday'
2
+ require 'logger'
3
+ require 'maremma'
4
+
5
+ require_relative 'utils'
6
+ require_relative 'base'
7
+
8
+ module Kishu
9
+ class UsageEvent
10
+
11
+ API_URL = "https://api.datacite.org"
12
+
13
+ def wrap_event(event)
14
+ puts "------------------ \n"
15
+ totale = event.dig("totale").fetch("buckets", [])
16
+ # puts event.dig("unique").fetch("buckets", nil)
17
+ unique = event.dig("unique").fetch("buckets", [])
18
+ # puts unique[1].dig('key')
19
+
20
+ unique_regular = unique.find_all {|access_method| access_method.fetch('key',"").match('regular') }
21
+ unique_machine = unique.find_all {|access_method| access_method.fetch('key',"").match('machine') }
22
+ total_regular = totale.find_all {|access_method| access_method.fetch('key',"").match('regular') }
23
+ total_machine = totale.find_all {|access_method| access_method.fetch('key',"").match('machine') }
24
+
25
+ dataset = {
26
+ doi: event.dig("key","doi"),
27
+ unique_counts_regular: unique_regular.empty? ? 0 : unique_regular.size,
28
+ unique_counts_machine: unique_machine.empty? ? 0 : unique_machine.size,
29
+ total_counts_regular: total_regular.empty? ? 0 : total_regular.dig(0,"doc_count"),
30
+ total_counts_machine: total_machine.empty? ? 0: total_machine.dig(0,"doc_count")
31
+ }
32
+
33
+
34
+ # conn = Faraday.new(:url => API_URL)
35
+ logger = Logger.new(STDOUT)
36
+ logger.info event.fetch("doc_count")
37
+
38
+ # arr = dois.map do |dataset|
39
+ logger.info dataset
40
+ doi = dataset.fetch(:doi,nil)
41
+ # json = conn.get "/works/#{doi}"
42
+ # json = conn.get do |req|
43
+ # req.url "/works/#{doi}"
44
+ # req.options.timeout = 50 # open/read timeout in seconds
45
+ # req.options.open_timeout = 20 # connection open timeout in seconds
46
+ # end
47
+ # json = Maremma.get "#{API_URL}/works/#{doi}"
48
+ # logger.info json.status
49
+
50
+ # return {} unless json.status == 200
51
+ # logger.info "Success on getting metadata for #{doi}"
52
+ # data = JSON.parse(json.body)
53
+ # data = json.body
54
+ data = {}
55
+ instances =[
56
+ {
57
+ count: dataset.fetch(:total_counts_regular),
58
+ "access-method": "regular",
59
+ "metric-type": "total-resolutions"
60
+ },
61
+ {
62
+ count: dataset.fetch(:unique_counts_regular),
63
+ "access-method": "regular",
64
+ "metric-type": "unique-resolutions"
65
+ },
66
+ {
67
+ count: dataset.fetch(:unique_counts_machine),
68
+ "access-method": "machine",
69
+ "metric-type": "unique-resolutions"
70
+ },
71
+ {
72
+ count: dataset.fetch(:total_counts_machine),
73
+ "access-method": "machine",
74
+ "metric-type": "total-resolutions"
75
+ },
76
+ ]
77
+
78
+ instances.delete_if {|instance| instance.dig(:count) <= 0}
79
+ attributes = {} #data.dig("data","attributes")
80
+ resource_type = "" #attributes.fetch("resource-type-id",nil).nil? ? "dataset" : attributes.fetch("resource-type-id",nil)
81
+
82
+ instanced = {
83
+ "dataset-id" => [{type: "doi", value: dataset.fetch(:doi,nil)}],
84
+ "data-type" => resource_type,
85
+ yop: attributes.fetch("published",nil),
86
+ uri: attributes.fetch("identifier",nil),
87
+ publisher: attributes.fetch("container-title",nil),
88
+ "dataset-title": attributes.fetch("title",nil),
89
+ "publisher-id": [{
90
+ type: "client-id",
91
+ value: attributes.fetch("data-center-id",nil)
92
+ }],
93
+ "dataset-dates": [{
94
+ type: "pub-date",
95
+ value: attributes.fetch("published",nil)
96
+ }],
97
+ "dataset-contributors": attributes.fetch("author",[]).map { |a| get_authors(a) },
98
+ platform: "datacite",
99
+ performance: [{
100
+ period: @period,
101
+ instance: instances
102
+ }]
103
+ }
104
+ logger.info instanced
105
+
106
+ instanced
107
+ end
108
+
109
+
110
+ def get_authors author
111
+ if (author.key?("given") && author.key?("family"))
112
+ { type: "name",
113
+ value: author.fetch("given",nil)+" "+author.fetch("family",nil) }
114
+ elsif author.key?("literal")
115
+ { type: "name",
116
+ value: author.fetch("literal",nil) }
117
+ else
118
+ { type: "name",
119
+ value: "" }
120
+ end
121
+ end
122
+
123
+ end
124
+ end
@@ -0,0 +1,115 @@
1
+ require "bolognese"
2
+ require "time"
3
+
4
+ module Kishu
5
+ module Utils
6
+ include ::Bolognese::MetadataUtils
7
+
8
+ def clean_tmp
9
+ system("rm tmp/datasets-*.json")
10
+ puts "/tmp Files deleted"
11
+ end
12
+
13
+ def merged_file
14
+ "reports/datacite_resolution_report_#{report_period.strftime("%Y-%m")}_2.json"
15
+ end
16
+
17
+ def encoded_file
18
+ "reports/datacite_resolution_report_#{report_period.strftime("%Y-%m")}_encoded.json"
19
+ end
20
+
21
+ def generate_header_footer
22
+ report_header = '{"report-header": '+get_header.to_json.to_s+',"report-datasets": [ '+"\n"
23
+
24
+ File.open("tmp/datasets-00-report-header.json","w") do |f|
25
+ f.write(report_header)
26
+ end
27
+ report_footer = ']'+"\n"+'}'
28
+
29
+ File.open("tmp/datasets-zz99-report-footer.json","w") do |f|
30
+ f.write(report_footer)
31
+ end
32
+ end
33
+
34
+ def get_authors author
35
+ if (author.key?("given") && author.key?("family"))
36
+ { type: "name",
37
+ value: author.fetch("given",nil)+" "+author.fetch("family",nil) }
38
+ elsif author.key?("literal")
39
+ { type: "name",
40
+ value: author.fetch("literal",nil) }
41
+ else
42
+ { type: "name",
43
+ value: "" }
44
+ end
45
+ end
46
+
47
+ def format_instance data, options={}
48
+ obj = get_metadata(options[:dataset_id])
49
+ subj = {id:options[:report_id]}
50
+ # subj = "https://api.datacite.org/reports/0cb326d1-e3e7-4cc1-9d86-7c5f3d5ca310"
51
+ relation_type = "#{data[:"metric-type"]}-#{data[:"access-method"]}"
52
+ source_id = "datacite-resolution"
53
+ source_token = SOURCE_TOKEN
54
+ {
55
+ "data" => {
56
+ "type" => "events",
57
+ "attributes" => {
58
+ "message-action" => "create",
59
+ "subj-id" => options[:report_id],
60
+ "total" => data[:count],
61
+ "obj-id" => options[:dataset_id],
62
+ "relation-type-id" => relation_type.to_s.dasherize,
63
+ "source-id" => source_id,
64
+ "source-token" => source_token,
65
+ "occurred-at" => Time.now.iso8601, # need modify
66
+ "timestamp" => Time.now.iso8601,
67
+ "license" => LICENSE,
68
+ "subj" => subj,
69
+ "obj" => obj } }}
70
+ end
71
+
72
+ def get_metadata id
73
+ doi = doi_from_url(id)
74
+ return {} unless doi.present?
75
+
76
+ url = API_URL + "/dois/#{doi}"
77
+ response = Maremma.get(url)
78
+ return {} if response.status != 200
79
+
80
+ attributes = response.body.dig("data", "attributes")
81
+ relationships = response.body.dig("data", "relationships")
82
+
83
+ resource_type = response.body.dig("data", "relationships")
84
+ resource_type_general = relationships.dig("resource-type", "data", "id")
85
+ type = Bolognese::Utils::CR_TO_SO_TRANSLATIONS[resource_type.to_s.underscore.camelcase] || Bolognese::Utils::DC_TO_SO_TRANSLATIONS[resource_type_general.to_s.underscore.camelcase(first_letter = :upper)] || "CreativeWork"
86
+ author = Array.wrap(attributes["author"]).map do |a|
87
+ {
88
+ "given_name" => a["givenName"],
89
+ "family_name" => a["familyName"],
90
+ "name" => a["familyName"].present? ? nil : a["name"] }.compact
91
+ end
92
+ client_id = relationships.dig("client", "data", "id")
93
+
94
+ {
95
+ "id" => id,
96
+ "type" => type.underscore.dasherize,
97
+ "name" => attributes["title"],
98
+ "author" => author,
99
+ "publisher" => attributes["publisher"],
100
+ "version" => attributes["version"],
101
+ "date_published" => attributes["published"],
102
+ "date_modified" => attributes["updated"],
103
+ "registrant_id" => "datacite.#{client_id}" }.compact
104
+ end
105
+
106
+ def encoded
107
+ Base64.strict_encode64(compress_merged_file)
108
+ end
109
+
110
+ def checksum
111
+ Digest::SHA256.hexdigest(compress_merged_file)
112
+ end
113
+
114
+ end
115
+ end
@@ -0,0 +1,3 @@
1
+ module Kishu
2
+ VERSION = "0.0.0"
3
+ end
@@ -0,0 +1,71 @@
1
+
2
+ FactoryBot.define do
3
+ factory :resolution_event do
4
+ period "begin_date": "2018-03-01", "end_date": "2018-03-31"
5
+ event
6
+ {
7
+ "key": "10.5065/D6V1236Q",
8
+ "doc_count": 5566,
9
+ "total": {
10
+ "doc_count_error_upper_bound": 0,
11
+ "sum_other_doc_count": 0,
12
+ "buckets": [
13
+ {
14
+ "key": "machine",
15
+ "doc_count": 5093
16
+ },
17
+ {
18
+ "key": "regular",
19
+ "doc_count": 473
20
+ }
21
+ ]
22
+ },
23
+ "access_method": {
24
+ "doc_count_error_upper_bound": 0,
25
+ "sum_other_doc_count": 0,
26
+ "buckets": [
27
+ {
28
+ "key": "machine",
29
+ "doc_count": 5093,
30
+ "session": {
31
+ "doc_count_error_upper_bound": 10,
32
+ "sum_other_doc_count": 5072,
33
+ "buckets": [
34
+ {
35
+ "key": "2018-09-18_16_10.5065/D6V1236Q_54.71.12.185_curl/7.38.0",
36
+ "doc_count": 5
37
+ },
38
+ {
39
+ "key": "2018-09-01_05_10.5065/D6V1236Q_45.79.139.170_curl/7.38.0",
40
+ "doc_count": 4
41
+ },
42
+ {
43
+ "key": "2018-09-03_16_10.5065/D6V1236Q_52.40.104.81_curl/7.38.0",
44
+ "doc_count": 4
45
+ },
46
+ {
47
+ "key": "2018-09-12_00_10.5065/D6V1236Q_52.39.7.168_curl/7.38.0",
48
+ "doc_count": 4
49
+ },
50
+ {
51
+ "key": "2018-09-26_06_10.5065/D6V1236Q_52.39.7.168_curl/7.38.0",
52
+ "doc_count": 4
53
+ }
54
+ ]
55
+ },
56
+ "unqiue": {
57
+ "value": 3084
58
+ }
59
+ }]
60
+ }
61
+ }
62
+ end
63
+
64
+
65
+ factory :usage_event do
66
+ end
67
+
68
+ factory :report do
69
+ end
70
+
71
+ end