ac-summarization-utils 0.0.47.rc1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,40 @@
1
+
2
+ module ACSummarizationUtils
3
+
4
+ class Record
5
+
6
+ attr_accessor :required_fields
7
+ attr_accessor :group_fields
8
+
9
+ def initialize(input_hash)
10
+ @required_fields = input_hash[:required] if input_hash[:required]
11
+ @group_fields = input_hash[:group_fields] if input_hash[:group_fields]
12
+ @required_fields ||= {}
13
+ @group_fields ||= {}
14
+ end
15
+
16
+ def add_required_field(name, value)
17
+ @required_fields[name] = value
18
+ end
19
+
20
+ def add_group_field(name, value)
21
+ @group_fields[name] = value
22
+ end
23
+
24
+ def clone
25
+ cloned_record = Record.new({})
26
+ @required_fields.each do |name, val|
27
+ cloned_record.add_required_field(name, val)
28
+ end
29
+ @group_fields.each do |name, val|
30
+ cloned_record.add_group_field(name, val)
31
+ end
32
+ return cloned_record
33
+ end
34
+
35
+ def ==(other)
36
+ return (@required_fields == other.required_fields and @group_fields == other.group_fields)
37
+ end
38
+
39
+ end
40
+ end
@@ -0,0 +1,189 @@
1
+ #insert on duplicate update records to db (using table name, partitions data)
2
+ require 'mysql2'
3
+
4
+ module ACSummarizationUtils
5
+ class ResultsDAL
6
+
7
+ MYSQL_ACTION_TRANSLATOR = {"min" => "LEAST", "max" => "GREATEST" }
8
+
9
+ class DBFatalError < RuntimeError; end
10
+ class InputError < RuntimeError; end
11
+ class DBUpdateError < RuntimeError
12
+ attr_accessor :queries, :perform_rollback
13
+ end
14
+
15
+ def initialize(db_options, table_name, partition_options)
16
+ begin
17
+ @mysql_client = Mysql2::Client.new({:host => db_options[:host], :username => db_options[:user], :password => db_options[:password] , :database => db_options[:db_name], :reconnect => true})
18
+ rescue Exception => e
19
+ raise DBFatalError, e.message
20
+ end
21
+ @table_name = table_name
22
+ @use_partitions = partition_options.nil? ? false : partition_options[:use_partitions]
23
+ @bulk_size = (partition_options.nil? or partition_options[:bulk_size].nil?) ? 1 : partition_options[:bulk_size].to_i
24
+ @use_ids = true
25
+ @use_ids = partition_options[:use_ids] if !partition_options.nil? and !partition_options[:use_ids].nil?
26
+ if @use_partitions
27
+ @partition_field = partition_options[:field_name]
28
+ @partitions_number = partition_options[:num_partitions].to_i
29
+ @partition_prefixes = partition_options[:partition_prefixes] if partition_options[:partition_prefixes]
30
+ @create_tables = (partition_options[:create_tables] ? true : false)
31
+ @create_table_query = "CREATE TABLE IF NOT EXISTS %TABLE_NAME% " + partition_options[:create_table_columns] if partition_options[:create_table_columns]
32
+ raise DBFatalError, "Bad partition arguments" if (@partition_field.nil? or @partitions_number.nil? or @partitions_number <= 0 or @partitions_number > 100)
33
+ else
34
+ @partitions_number = 1
35
+ end
36
+ @use_transactions = db_options[:use_transactions]
37
+ raise DBFatalError, "Results DB - table #{table_name} doesn't exist!!" unless (table_exists?(table_name) or (@create_tables and @create_table_query))
38
+ end
39
+
40
+ def table_exists?(table_name)
41
+ begin
42
+ query = "show tables like '#{table_name}%'"
43
+ result = @mysql_client.query(query)
44
+ return (result.nil? or result.size == 0 ? false : true)
45
+ rescue Exception => e
46
+ return false
47
+ end
48
+ end
49
+
50
+ def insert_rows(rows, columns ,duplicate_fields, duplicate_fields_actions=nil)
51
+ return if rows.nil? or rows.empty? or columns.nil? or columns.empty?
52
+ if duplicate_fields.nil? or duplicate_fields.empty?
53
+ insert_queries, table_names = generate_unique_queries(rows,columns)
54
+ else
55
+ insert_queries, table_names = generate_queries(rows, columns ,duplicate_fields,duplicate_fields_actions)
56
+ end
57
+ current_index=0
58
+ begin
59
+ if @use_transactions
60
+ @mysql_client.query("SET AUTOCOMMIT=0")
61
+ @mysql_client.query("begin")
62
+ end
63
+ if (@create_tables)
64
+ create_tables_if_needed(table_names)
65
+ end
66
+ insert_queries.each_with_index do |insert_query,index|
67
+ current_index = index
68
+ @mysql_client.query(insert_query[:query])
69
+ insert_query[:status] = "success"
70
+ end
71
+ @mysql_client.query("commit") if @use_transactions
72
+ rescue Mysql2::Error => e
73
+ if @use_transactions
74
+ @mysql_client.query("rollback")
75
+ raise DBFatalError, "Perfoming rollback - #{e.message}"
76
+ else
77
+ db_update_error = DBUpdateError.new e.message
78
+ insert_queries[current_index][:status] = "failure"
79
+ db_update_error.queries = insert_queries
80
+ db_update_error.perform_rollback = false
81
+ raise db_update_error
82
+ end
83
+ end
84
+ end
85
+
86
+ private
87
+
88
+ def create_tables_if_needed(table_names)
89
+ return if @create_table_query.nil?
90
+ table_names.each do |table_name|
91
+ create_query = @create_table_query.gsub('%TABLE_NAME%', table_name)
92
+ @mysql_client.query(create_query)
93
+ end
94
+ end
95
+
96
+ def generate_unique_queries(rows,columns)
97
+ insert_value_header = @use_ids ? "(id," : "("
98
+
99
+ insert_query_header = "insert into %TABLE_NAME% #{insert_value_header}#{columns.join(',')}) values \n"
100
+ table_queries, table_names = create_insert_values_per_partition(rows, columns)
101
+ queries = []
102
+
103
+ (0...@partitions_number).each do |part_id|
104
+ partition_id = @partitions_number > 10 ? '%02d' % part_id : part_id
105
+ next if table_queries[partition_id].nil?
106
+ data = table_queries[partition_id]
107
+ query_header = insert_query_header.gsub('%TABLE_NAME%', data[:table_name])
108
+ current_size = data[:values].size
109
+ prev_index = 0
110
+ while current_size > 0 do
111
+ current_bulk_values = data[:values].slice(prev_index, @bulk_size).map { |v| v[:value] }
112
+ queries << {:query=>"#{query_header} #{current_bulk_values.join(',')};",:status=>"not run"}
113
+ current_size -= @bulk_size
114
+ prev_index += @bulk_size
115
+ end
116
+ end
117
+ return queries, table_names
118
+ end
119
+
120
+ def generate_queries(rows, columns ,duplicate_fields, duplicate_fields_actions=nil)
121
+ insert_value_header = @use_ids ? "(id," : "("
122
+ insert_query_header = "insert into %TABLE_NAME% #{insert_value_header}#{columns.join(',')}) values \n"
123
+
124
+ duplicate_values = create_duplicate_values_statement(duplicate_fields,duplicate_fields_actions)
125
+ queries = []
126
+ table_queries, table_names = create_insert_values_per_partition(rows, columns)
127
+ key_columns = columns - duplicate_fields
128
+
129
+ (0...@partitions_number).each do |part_id|
130
+ partition_id = @partitions_number > 10 ? '%02d' % part_id : part_id
131
+ next if table_queries[partition_id].nil?
132
+ data = table_queries[partition_id]
133
+ query_header = insert_query_header.gsub('%TABLE_NAME%', data[:table_name])
134
+ data[:values].sort_by! {|value| sort_key = []; key_columns.each { |col| sort_key << value[:original_row][col].to_s }; sort_key }
135
+ current_size = data[:values].size
136
+ prev_index = 0
137
+ while current_size > 0 do
138
+ current_bulk_values = data[:values].slice(prev_index, @bulk_size).map { |v| v[:value] }
139
+ queries << {:query=>"#{query_header} #{current_bulk_values.join(',')} on duplicate key update #{duplicate_values.join(',')};",:status=>"not run"}
140
+ current_size -= @bulk_size
141
+ prev_index += @bulk_size
142
+ end
143
+ end
144
+ return queries, table_names
145
+ end
146
+
147
+ def create_duplicate_values_statement(duplicate_fields,duplicate_fields_actions)
148
+ duplicate_values = duplicate_fields.map { |f| "#{f} = #{f} + values(#{f})" }
149
+ duplicate_fields_actions.each do |k,v|
150
+ if (MYSQL_ACTION_TRANSLATOR.has_key?(v))
151
+ duplicate_values << "#{k} = #{MYSQL_ACTION_TRANSLATOR[v]}(#{k},values(#{k}))"
152
+ else
153
+ sum = "(#{v['SUM']} + values(#{v['SUM']}))"
154
+ count = "(#{v['COUNT']} + values(#{v['COUNT']}))"
155
+ duplicate_values << "#{k} = #{sum}/#{count}"
156
+ end
157
+ end if !duplicate_fields_actions.nil?
158
+ return duplicate_values
159
+ end
160
+
161
+ def create_insert_values_per_partition(rows, columns)
162
+ table_queries = {}
163
+ partition_id = 0
164
+ table_names = Set.new
165
+ new_table_name = @table_name
166
+ value_header = @use_ids ? "(NULL," : "("
167
+ rows.each do |row|
168
+ values_array = columns.map { |c| row[c].nil? ? "NULL" : "'#{row[c].is_a?(String) ? @mysql_client.escape(row[c]) : row[c]}'" }
169
+ if @use_partitions
170
+ raise InputError, "Exception in manual partitioning by #{@partition_field} for row" if row[@partition_field].nil?
171
+ partition_id = row[@partition_field].to_i % @partitions_number
172
+ partition_id = '%02d' % partition_id if @partitions_number > 10
173
+ partition_suffix_arr = [partition_id]
174
+ if @partition_prefixes
175
+ partition_suffix_arr = @partition_prefixes.map { |prefix| row[prefix].to_s.gsub(/\s+/, '_') unless row[prefix].nil? }
176
+ partition_suffix_arr << partition_id
177
+ end
178
+
179
+ new_table_name = "#{@table_name}_#{partition_suffix_arr.join('_')}"
180
+ table_names << new_table_name
181
+ end
182
+ table_queries[partition_id] ||= { :table_name => new_table_name, :values => [] }
183
+ table_queries[partition_id][:values] << {:value =>"#{value_header}#{values_array.join(',')})", :original_row => row}
184
+ end
185
+ return table_queries, table_names
186
+ end
187
+
188
+ end
189
+ end
@@ -0,0 +1,41 @@
1
+ require 'set'
2
+
3
+ module ACSummarizationUtils
4
+ module RevenueCalcUtils
5
+
6
+ PAYMENT_TYPES = { 1 => :rev_share, 2 => :cpm, 3 => :dynamic, 4 => :geo, 5 => :fixed, 6 => :min_rpc}
7
+ GEO_COUNTRIES = Set.new ["us"]
8
+
9
+ def calc_publisher_click_revenue(revenue, pub_rev_share, pub_payment_type, country)
10
+ result = 0
11
+ if should_calc_pub_revenue?(pub_payment_type,country)
12
+ result = pub_rev_share * revenue
13
+ end
14
+ return result
15
+ end
16
+
17
+ def calc_publisher_cpm_revenue(imp_count, pub_cpm_share, pub_payment_type, country)
18
+ result = 0
19
+ if !should_calc_pub_revenue?(pub_payment_type,country) or PAYMENT_TYPES[pub_payment_type] == :dynamic
20
+ result = (pub_cpm_share * imp_count)/1000
21
+ end
22
+ return result
23
+ end
24
+
25
+ def calc_publisher_us_click_revenue(cpc,country)
26
+ return 0 if country.nil?
27
+ cpc = ((GEO_COUNTRIES.include? country.strip) ? cpc : 0 )
28
+ return cpc
29
+ end
30
+
31
+ def is_cpm_geo?(country)
32
+ return false if country.nil?
33
+ return GEO_COUNTRIES.include? country.strip
34
+ end
35
+
36
+ def should_calc_pub_revenue?(pub_payment_type, country)
37
+ return (PAYMENT_TYPES[pub_payment_type] == :rev_share or PAYMENT_TYPES[pub_payment_type] == :dynamic or (PAYMENT_TYPES[pub_payment_type] == :geo and !is_cpm_geo?(country)) or PAYMENT_TYPES[pub_payment_type] == :fixed or PAYMENT_TYPES[pub_payment_type] == :min_rpc )
38
+ end
39
+
40
+ end
41
+ end
@@ -0,0 +1,119 @@
1
+ require_relative 'record'
2
+ require_relative 'lookup_cache'
3
+ require_relative 'errors_handler'
4
+
5
+ module ACSummarizationUtils
6
+ module SummUtils
7
+
8
+ def initialize_resources(redis_options, lookup_db_options, results_db_options, db_partition_field, db_partition_prefixes=nil)
9
+ lookup_cache = LookupCache.connect(redis_options, lookup_db_options)
10
+ results_dal = ResultsDAL.new({:host => results_db_options[:host], :user => results_db_options[:user], :password => results_db_options[:password], :db_name => results_db_options[:name], :use_transactions => results_db_options[:use_transactions]},
11
+ results_db_options[:table_name],
12
+ { :use_partitions => results_db_options[:use_manual_partitions], :field_name => db_partition_field, :num_partitions => results_db_options[:num_partitions], :bulk_size => results_db_options[:bulk_size], :create_tables => results_db_options[:create_tables], :create_table_columns => results_db_options[:create_table_columns], :partition_prefixes => db_partition_prefixes})
13
+
14
+ return lookup_cache, results_dal
15
+ end
16
+
17
+ def create_records(logger, error_handler, messages ,&block)
18
+ if messages and messages.size > 0
19
+ records = []
20
+ input_error_messages = []
21
+ messages.each do |m|
22
+ begin
23
+ records.concat(block.call(downcase_hash(m)))
24
+ rescue LookupCache::InputError, ArgumentError =>e
25
+ #print_error(logger, "Caught exception of type #{e.class}, exception message - #{e.message}, #{e.backtrace} - sending erroneous message to a different queue")
26
+ input_error_messages << error_handler.generate_error_message(m,e)
27
+ end
28
+ end
29
+ return records, input_error_messages
30
+ end
31
+ end
32
+
33
+ #raises an exception if cannot parse date
34
+ def get_date(date)
35
+ return nil if date.nil?
36
+ begin
37
+ time_object = date.is_a?(Fixnum) ? Time.at(date) : Time.parse(date)
38
+ return time_object.strftime("%Y-%m-%d")
39
+ rescue Exception => e
40
+ raise InputError, "Unable to parse time string: #{date} - message: #{e.message}"
41
+ end
42
+ end
43
+
44
+ #downcase hash keys
45
+ def downcase_hash(input)
46
+ return nil if input.nil?
47
+ result = {}
48
+ input.each do |k, v|
49
+ result[k.downcase] = v
50
+ end
51
+ return result
52
+ end
53
+
54
+ #duplicate records according to specific field values
55
+ def duplicate_records(current_records, field, expanded_field_values)
56
+ return current_records if current_records.nil? or current_records.size == 0 or field.nil? or expanded_field_values.nil? or expanded_field_values.size == 0
57
+ additional_records = []
58
+ current_records.each do |new_record|
59
+ new_record.add_required_field(field, expanded_field_values.first)
60
+ expanded_field_values[1..-1].each do |expanded_value|
61
+ cloned_record = new_record.clone
62
+ cloned_record.add_required_field(field, expanded_value)
63
+ additional_records << cloned_record
64
+ end
65
+ end
66
+ current_records += additional_records
67
+ return current_records
68
+ end
69
+
70
+ #group_options is a array of this form: [ {:output_field_name => "", :method => "" , :input_field_name => "" }]
71
+ #example: [ {:output_field_name => "spend", :method => method(:sum), :input_field_name => "cpc"}]
72
+ def summarize_records(records, group_options)
73
+ return [] if records.nil? or records.empty?
74
+ unified_records = {}
75
+ records.each do |record|
76
+ unified_key = record.required_fields.to_a.join(";")
77
+ unified_records[unified_key] ||= record.required_fields
78
+ group_options.each do |group_operation|
79
+ group_operation[:method].call(unified_records[unified_key], record, group_operation[:input_field_name], group_operation[:output_field_name])
80
+ end
81
+ end
82
+ return unified_records.values
83
+ end
84
+
85
+ def sum(unified_record, record, input_field_name, output_field_name)
86
+ raise ArgumentError, "Unsupported field name for group function: #{input_field_name}" if record.group_fields[input_field_name].nil?
87
+ unified_record[output_field_name] ||= 0
88
+ unified_record[output_field_name] += record.group_fields[input_field_name].to_f
89
+ end
90
+
91
+ def count(unified_record, record, input_field_name, output_field_name)
92
+ raise ArgumentError, "Unsupported field name for group function: #{input_field_name}" if record.group_fields[input_field_name].nil?
93
+ unified_record[output_field_name] ||= 0
94
+ unified_record[output_field_name] += 1 if record.group_fields[input_field_name]
95
+ end
96
+
97
+ def minimum(unified_record, record, input_field_name, output_field_name)
98
+ raise ArgumentError, "Unsupported field name for group function: #{input_field_name}" if record.group_fields[input_field_name].nil?
99
+ unified_record[output_field_name] ||= 1000
100
+ unified_record[output_field_name] = [unified_record[output_field_name].to_f,record.group_fields[input_field_name].to_f].min
101
+ end
102
+
103
+ def maximum(unified_record, record, input_field_name, output_field_name)
104
+ raise ArgumentError, "Unsupported field name for group function: #{input_field_name}" if record.group_fields[input_field_name].nil?
105
+ unified_record[output_field_name] ||= 0
106
+ unified_record[output_field_name] = [unified_record[output_field_name].to_f,record.group_fields[input_field_name].to_f].max
107
+ end
108
+
109
+ def average(unified_record, record, input_field_name, output_field_name)
110
+ raise ArgumentError, "Unsupported field name for group function: #{input_field_name}" if record.group_fields[input_field_name].nil?
111
+ unified_record[output_field_name] ||= 0
112
+ sum(unified_record,record,input_field_name,output_field_name+"_sum")
113
+ count(unified_record,record,input_field_name,output_field_name+"_count")
114
+ unified_record[output_field_name] ||= 0
115
+ unified_record[output_field_name] = (( unified_record[output_field_name+"_count"] > 0 ) ? (unified_record[output_field_name+"_sum"] / unified_record[output_field_name+"_count"]) : 0.0 )
116
+ end
117
+
118
+ end
119
+ end
@@ -0,0 +1,93 @@
1
+
2
+ module ACSummarizationUtils
3
+ module ValidationUtils
4
+
5
+ ALLOWED_CLICK_TYPES = [1, 2, 5, 6, 1005]
6
+ REAL_CLICKS = [1, 3, 5, 11, 107, 108, 801, 805]
7
+ INVALID_CLICK_TYPE = -1
8
+ CPC = 1
9
+ CPV = 3
10
+
11
+ def common_click_validations(message)
12
+ click_type = message["clicktype"].nil? ? message["ct"] : message["clicktype"]
13
+ return [INVALID_CLICK_TYPE, -1] if click_type.to_s.empty? and (message["clickurl"].to_s.empty? or message["clickurl"] == 'NULL')
14
+ raise ArgumentError, "message required field click_type not ct doesn't exist" if click_type.nil?
15
+ valid = message["valid"].nil? ? message["validitytype"] : message["valid"]
16
+ raise ArgumentError, "message required field valid not validitytype doesn't exist" if valid.nil?
17
+ ["ad_id","advertiser_id","cpc"].each do |field_name|
18
+ raise ArgumentError, "message required field '#{field_name}' doesn't exist" if message[field_name].nil?
19
+ end
20
+ raise ArgumentError, "invalid ad id: #{message["ad_id"]}" unless message["ad_id"].to_s =~ /^[-]?[0-9]+$/
21
+ [click_type,valid]
22
+ end
23
+
24
+ def is_valid_real_click?(message, campaign_type)
25
+ click_type, valid = common_click_validations(message)
26
+ if (REAL_CLICKS.include?(click_type.to_i) or (1005 == click_type.to_i and campaign_type.to_i == CPC))
27
+ return (message["ad_id"].to_i != -1 and message["advertiser_id"].to_i == 8 and valid.to_i == 1)
28
+ end
29
+ return false
30
+ end
31
+
32
+ def is_valid_direct_click?(message)
33
+ click_type, valid = common_click_validations(message)
34
+ return (message["ad_id"].to_i != -1 and message["advertiser_id"].to_i == 8 and valid.to_i == 1 and
35
+ message["cpc"].to_f >= 0.0 and ALLOWED_CLICK_TYPES.include?(click_type.to_i))
36
+ end
37
+
38
+ def is_valid_direct_layer_impr?(message)
39
+ ["ad_id","advertiserid"].each do |field_name|
40
+ raise ArgumentError, "message required field '#{field_name}' doesn't exist" if message[field_name].nil?
41
+ end
42
+ return (message["ad_id"].to_i != -1 and message["advertiserid"].to_i == 8)
43
+ end
44
+
45
+ def is_valid_conversion?(message)
46
+ ["adid","conversioneventid","publisherid"].each do |field_name|
47
+ raise ArgumentError, "message required field '#{field_name}' doesn't exist" if message[field_name].nil?
48
+ end
49
+ return (message["adid"].to_i != -1 and message["conversioneventid"].to_i > 0)
50
+ end
51
+
52
+ def is_valid_dynamic_content_event?(message)
53
+ ["cid"].each do |field_name|
54
+ raise ArgumentError, "message required field '#{field_name}' doesn't exist" if message[field_name].nil?
55
+ end
56
+ return (message["cid"].to_i != -1)
57
+ end
58
+
59
+ def is_valid_layer_impr?(message)
60
+ ["ad_id","advertiserid","cpc"].each do |field_name|
61
+ raise ArgumentError, "message required field '#{field_name}' doesn't exist" if message[field_name].nil?
62
+ end
63
+ return message["cpc"].to_f >= 0.0
64
+ end
65
+
66
+ def is_valid_click?(message)
67
+ click_type, valid = common_click_validations(message)
68
+ publisher_field = message["publisher_id"] || message["p"]
69
+ raise ArgumentError, "message required field 'publisher_id' or 'p' doesn't exist" if publisher_field.nil?
70
+
71
+ return ( valid.to_i == 1 and message["cpc"].to_f >= 0.0 and ALLOWED_CLICK_TYPES.include?(click_type.to_i))
72
+ end
73
+
74
+ def is_valid_impression?(message)
75
+ ["pid", "itid"].each do |field_name|
76
+ raise ArgumentError, "message required field '#{field_name}' doesn't exist" if message[field_name].nil?
77
+ end
78
+ return message["itid"].to_i == 3
79
+ end
80
+
81
+ def is_valid_inimage_report?(message)
82
+ ["value"].each do |field_name|
83
+ raise ArgumentError, "message required field '#{field_name}' doesn't exist" if message[field_name].nil?
84
+ end
85
+ value = message["value"]
86
+ ["account_id"].each do |field_name|
87
+ raise ArgumentError, "message required field '#{field_name}' doesn't exist" if value[field_name].nil?
88
+ end
89
+ return message["value"]["account_id"] != ""
90
+ end
91
+
92
+ end
93
+ end