ac-summarization-utils 0.0.47.rc1

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,40 @@
1
+
2
+ module ACSummarizationUtils
3
+
4
+ class Record
5
+
6
+ attr_accessor :required_fields
7
+ attr_accessor :group_fields
8
+
9
+ def initialize(input_hash)
10
+ @required_fields = input_hash[:required] if input_hash[:required]
11
+ @group_fields = input_hash[:group_fields] if input_hash[:group_fields]
12
+ @required_fields ||= {}
13
+ @group_fields ||= {}
14
+ end
15
+
16
+ def add_required_field(name, value)
17
+ @required_fields[name] = value
18
+ end
19
+
20
+ def add_group_field(name, value)
21
+ @group_fields[name] = value
22
+ end
23
+
24
+ def clone
25
+ cloned_record = Record.new({})
26
+ @required_fields.each do |name, val|
27
+ cloned_record.add_required_field(name, val)
28
+ end
29
+ @group_fields.each do |name, val|
30
+ cloned_record.add_group_field(name, val)
31
+ end
32
+ return cloned_record
33
+ end
34
+
35
+ def ==(other)
36
+ return (@required_fields == other.required_fields and @group_fields == other.group_fields)
37
+ end
38
+
39
+ end
40
+ end
@@ -0,0 +1,189 @@
1
+ #insert on duplicate update records to db (using table name, partitions data)
2
+ require 'mysql2'
3
+
4
+ module ACSummarizationUtils
5
+ class ResultsDAL
6
+
7
+ MYSQL_ACTION_TRANSLATOR = {"min" => "LEAST", "max" => "GREATEST" }
8
+
9
+ class DBFatalError < RuntimeError; end
10
+ class InputError < RuntimeError; end
11
+ class DBUpdateError < RuntimeError
12
+ attr_accessor :queries, :perform_rollback
13
+ end
14
+
15
+ def initialize(db_options, table_name, partition_options)
16
+ begin
17
+ @mysql_client = Mysql2::Client.new({:host => db_options[:host], :username => db_options[:user], :password => db_options[:password] , :database => db_options[:db_name], :reconnect => true})
18
+ rescue Exception => e
19
+ raise DBFatalError, e.message
20
+ end
21
+ @table_name = table_name
22
+ @use_partitions = partition_options.nil? ? false : partition_options[:use_partitions]
23
+ @bulk_size = (partition_options.nil? or partition_options[:bulk_size].nil?) ? 1 : partition_options[:bulk_size].to_i
24
+ @use_ids = true
25
+ @use_ids = partition_options[:use_ids] if !partition_options.nil? and !partition_options[:use_ids].nil?
26
+ if @use_partitions
27
+ @partition_field = partition_options[:field_name]
28
+ @partitions_number = partition_options[:num_partitions].to_i
29
+ @partition_prefixes = partition_options[:partition_prefixes] if partition_options[:partition_prefixes]
30
+ @create_tables = (partition_options[:create_tables] ? true : false)
31
+ @create_table_query = "CREATE TABLE IF NOT EXISTS %TABLE_NAME% " + partition_options[:create_table_columns] if partition_options[:create_table_columns]
32
+ raise DBFatalError, "Bad partition arguments" if (@partition_field.nil? or @partitions_number.nil? or @partitions_number <= 0 or @partitions_number > 100)
33
+ else
34
+ @partitions_number = 1
35
+ end
36
+ @use_transactions = db_options[:use_transactions]
37
+ raise DBFatalError, "Results DB - table #{table_name} doesn't exist!!" unless (table_exists?(table_name) or (@create_tables and @create_table_query))
38
+ end
39
+
40
+ def table_exists?(table_name)
41
+ begin
42
+ query = "show tables like '#{table_name}%'"
43
+ result = @mysql_client.query(query)
44
+ return (result.nil? or result.size == 0 ? false : true)
45
+ rescue Exception => e
46
+ return false
47
+ end
48
+ end
49
+
50
+ def insert_rows(rows, columns ,duplicate_fields, duplicate_fields_actions=nil)
51
+ return if rows.nil? or rows.empty? or columns.nil? or columns.empty?
52
+ if duplicate_fields.nil? or duplicate_fields.empty?
53
+ insert_queries, table_names = generate_unique_queries(rows,columns)
54
+ else
55
+ insert_queries, table_names = generate_queries(rows, columns ,duplicate_fields,duplicate_fields_actions)
56
+ end
57
+ current_index=0
58
+ begin
59
+ if @use_transactions
60
+ @mysql_client.query("SET AUTOCOMMIT=0")
61
+ @mysql_client.query("begin")
62
+ end
63
+ if (@create_tables)
64
+ create_tables_if_needed(table_names)
65
+ end
66
+ insert_queries.each_with_index do |insert_query,index|
67
+ current_index = index
68
+ @mysql_client.query(insert_query[:query])
69
+ insert_query[:status] = "success"
70
+ end
71
+ @mysql_client.query("commit") if @use_transactions
72
+ rescue Mysql2::Error => e
73
+ if @use_transactions
74
+ @mysql_client.query("rollback")
75
+ raise DBFatalError, "Perfoming rollback - #{e.message}"
76
+ else
77
+ db_update_error = DBUpdateError.new e.message
78
+ insert_queries[current_index][:status] = "failure"
79
+ db_update_error.queries = insert_queries
80
+ db_update_error.perform_rollback = false
81
+ raise db_update_error
82
+ end
83
+ end
84
+ end
85
+
86
+ private
87
+
88
+ def create_tables_if_needed(table_names)
89
+ return if @create_table_query.nil?
90
+ table_names.each do |table_name|
91
+ create_query = @create_table_query.gsub('%TABLE_NAME%', table_name)
92
+ @mysql_client.query(create_query)
93
+ end
94
+ end
95
+
96
+ def generate_unique_queries(rows,columns)
97
+ insert_value_header = @use_ids ? "(id," : "("
98
+
99
+ insert_query_header = "insert into %TABLE_NAME% #{insert_value_header}#{columns.join(',')}) values \n"
100
+ table_queries, table_names = create_insert_values_per_partition(rows, columns)
101
+ queries = []
102
+
103
+ (0...@partitions_number).each do |part_id|
104
+ partition_id = @partitions_number > 10 ? '%02d' % part_id : part_id
105
+ next if table_queries[partition_id].nil?
106
+ data = table_queries[partition_id]
107
+ query_header = insert_query_header.gsub('%TABLE_NAME%', data[:table_name])
108
+ current_size = data[:values].size
109
+ prev_index = 0
110
+ while current_size > 0 do
111
+ current_bulk_values = data[:values].slice(prev_index, @bulk_size).map { |v| v[:value] }
112
+ queries << {:query=>"#{query_header} #{current_bulk_values.join(',')};",:status=>"not run"}
113
+ current_size -= @bulk_size
114
+ prev_index += @bulk_size
115
+ end
116
+ end
117
+ return queries, table_names
118
+ end
119
+
120
+ def generate_queries(rows, columns ,duplicate_fields, duplicate_fields_actions=nil)
121
+ insert_value_header = @use_ids ? "(id," : "("
122
+ insert_query_header = "insert into %TABLE_NAME% #{insert_value_header}#{columns.join(',')}) values \n"
123
+
124
+ duplicate_values = create_duplicate_values_statement(duplicate_fields,duplicate_fields_actions)
125
+ queries = []
126
+ table_queries, table_names = create_insert_values_per_partition(rows, columns)
127
+ key_columns = columns - duplicate_fields
128
+
129
+ (0...@partitions_number).each do |part_id|
130
+ partition_id = @partitions_number > 10 ? '%02d' % part_id : part_id
131
+ next if table_queries[partition_id].nil?
132
+ data = table_queries[partition_id]
133
+ query_header = insert_query_header.gsub('%TABLE_NAME%', data[:table_name])
134
+ data[:values].sort_by! {|value| sort_key = []; key_columns.each { |col| sort_key << value[:original_row][col].to_s }; sort_key }
135
+ current_size = data[:values].size
136
+ prev_index = 0
137
+ while current_size > 0 do
138
+ current_bulk_values = data[:values].slice(prev_index, @bulk_size).map { |v| v[:value] }
139
+ queries << {:query=>"#{query_header} #{current_bulk_values.join(',')} on duplicate key update #{duplicate_values.join(',')};",:status=>"not run"}
140
+ current_size -= @bulk_size
141
+ prev_index += @bulk_size
142
+ end
143
+ end
144
+ return queries, table_names
145
+ end
146
+
147
+ def create_duplicate_values_statement(duplicate_fields,duplicate_fields_actions)
148
+ duplicate_values = duplicate_fields.map { |f| "#{f} = #{f} + values(#{f})" }
149
+ duplicate_fields_actions.each do |k,v|
150
+ if (MYSQL_ACTION_TRANSLATOR.has_key?(v))
151
+ duplicate_values << "#{k} = #{MYSQL_ACTION_TRANSLATOR[v]}(#{k},values(#{k}))"
152
+ else
153
+ sum = "(#{v['SUM']} + values(#{v['SUM']}))"
154
+ count = "(#{v['COUNT']} + values(#{v['COUNT']}))"
155
+ duplicate_values << "#{k} = #{sum}/#{count}"
156
+ end
157
+ end if !duplicate_fields_actions.nil?
158
+ return duplicate_values
159
+ end
160
+
161
+ def create_insert_values_per_partition(rows, columns)
162
+ table_queries = {}
163
+ partition_id = 0
164
+ table_names = Set.new
165
+ new_table_name = @table_name
166
+ value_header = @use_ids ? "(NULL," : "("
167
+ rows.each do |row|
168
+ values_array = columns.map { |c| row[c].nil? ? "NULL" : "'#{row[c].is_a?(String) ? @mysql_client.escape(row[c]) : row[c]}'" }
169
+ if @use_partitions
170
+ raise InputError, "Exception in manual partitioning by #{@partition_field} for row" if row[@partition_field].nil?
171
+ partition_id = row[@partition_field].to_i % @partitions_number
172
+ partition_id = '%02d' % partition_id if @partitions_number > 10
173
+ partition_suffix_arr = [partition_id]
174
+ if @partition_prefixes
175
+ partition_suffix_arr = @partition_prefixes.map { |prefix| row[prefix].to_s.gsub(/\s+/, '_') unless row[prefix].nil? }
176
+ partition_suffix_arr << partition_id
177
+ end
178
+
179
+ new_table_name = "#{@table_name}_#{partition_suffix_arr.join('_')}"
180
+ table_names << new_table_name
181
+ end
182
+ table_queries[partition_id] ||= { :table_name => new_table_name, :values => [] }
183
+ table_queries[partition_id][:values] << {:value =>"#{value_header}#{values_array.join(',')})", :original_row => row}
184
+ end
185
+ return table_queries, table_names
186
+ end
187
+
188
+ end
189
+ end
@@ -0,0 +1,41 @@
1
+ require 'set'
2
+
3
+ module ACSummarizationUtils
4
+ module RevenueCalcUtils
5
+
6
+ PAYMENT_TYPES = { 1 => :rev_share, 2 => :cpm, 3 => :dynamic, 4 => :geo, 5 => :fixed, 6 => :min_rpc}
7
+ GEO_COUNTRIES = Set.new ["us"]
8
+
9
+ def calc_publisher_click_revenue(revenue, pub_rev_share, pub_payment_type, country)
10
+ result = 0
11
+ if should_calc_pub_revenue?(pub_payment_type,country)
12
+ result = pub_rev_share * revenue
13
+ end
14
+ return result
15
+ end
16
+
17
+ def calc_publisher_cpm_revenue(imp_count, pub_cpm_share, pub_payment_type, country)
18
+ result = 0
19
+ if !should_calc_pub_revenue?(pub_payment_type,country) or PAYMENT_TYPES[pub_payment_type] == :dynamic
20
+ result = (pub_cpm_share * imp_count)/1000
21
+ end
22
+ return result
23
+ end
24
+
25
+ def calc_publisher_us_click_revenue(cpc,country)
26
+ return 0 if country.nil?
27
+ cpc = ((GEO_COUNTRIES.include? country.strip) ? cpc : 0 )
28
+ return cpc
29
+ end
30
+
31
+ def is_cpm_geo?(country)
32
+ return false if country.nil?
33
+ return GEO_COUNTRIES.include? country.strip
34
+ end
35
+
36
+ def should_calc_pub_revenue?(pub_payment_type, country)
37
+ return (PAYMENT_TYPES[pub_payment_type] == :rev_share or PAYMENT_TYPES[pub_payment_type] == :dynamic or (PAYMENT_TYPES[pub_payment_type] == :geo and !is_cpm_geo?(country)) or PAYMENT_TYPES[pub_payment_type] == :fixed or PAYMENT_TYPES[pub_payment_type] == :min_rpc )
38
+ end
39
+
40
+ end
41
+ end
@@ -0,0 +1,119 @@
1
+ require_relative 'record'
2
+ require_relative 'lookup_cache'
3
+ require_relative 'errors_handler'
4
+
5
+ module ACSummarizationUtils
6
+ module SummUtils
7
+
8
+ def initialize_resources(redis_options, lookup_db_options, results_db_options, db_partition_field, db_partition_prefixes=nil)
9
+ lookup_cache = LookupCache.connect(redis_options, lookup_db_options)
10
+ results_dal = ResultsDAL.new({:host => results_db_options[:host], :user => results_db_options[:user], :password => results_db_options[:password], :db_name => results_db_options[:name], :use_transactions => results_db_options[:use_transactions]},
11
+ results_db_options[:table_name],
12
+ { :use_partitions => results_db_options[:use_manual_partitions], :field_name => db_partition_field, :num_partitions => results_db_options[:num_partitions], :bulk_size => results_db_options[:bulk_size], :create_tables => results_db_options[:create_tables], :create_table_columns => results_db_options[:create_table_columns], :partition_prefixes => db_partition_prefixes})
13
+
14
+ return lookup_cache, results_dal
15
+ end
16
+
17
+ def create_records(logger, error_handler, messages ,&block)
18
+ if messages and messages.size > 0
19
+ records = []
20
+ input_error_messages = []
21
+ messages.each do |m|
22
+ begin
23
+ records.concat(block.call(downcase_hash(m)))
24
+ rescue LookupCache::InputError, ArgumentError =>e
25
+ #print_error(logger, "Caught exception of type #{e.class}, exception message - #{e.message}, #{e.backtrace} - sending erroneous message to a different queue")
26
+ input_error_messages << error_handler.generate_error_message(m,e)
27
+ end
28
+ end
29
+ return records, input_error_messages
30
+ end
31
+ end
32
+
33
+ #raises an exception if cannot parse date
34
+ def get_date(date)
35
+ return nil if date.nil?
36
+ begin
37
+ time_object = date.is_a?(Fixnum) ? Time.at(date) : Time.parse(date)
38
+ return time_object.strftime("%Y-%m-%d")
39
+ rescue Exception => e
40
+ raise InputError, "Unable to parse time string: #{date} - message: #{e.message}"
41
+ end
42
+ end
43
+
44
+ #downcase hash keys
45
+ def downcase_hash(input)
46
+ return nil if input.nil?
47
+ result = {}
48
+ input.each do |k, v|
49
+ result[k.downcase] = v
50
+ end
51
+ return result
52
+ end
53
+
54
+ #duplicate records according to specific field values
55
+ def duplicate_records(current_records, field, expanded_field_values)
56
+ return current_records if current_records.nil? or current_records.size == 0 or field.nil? or expanded_field_values.nil? or expanded_field_values.size == 0
57
+ additional_records = []
58
+ current_records.each do |new_record|
59
+ new_record.add_required_field(field, expanded_field_values.first)
60
+ expanded_field_values[1..-1].each do |expanded_value|
61
+ cloned_record = new_record.clone
62
+ cloned_record.add_required_field(field, expanded_value)
63
+ additional_records << cloned_record
64
+ end
65
+ end
66
+ current_records += additional_records
67
+ return current_records
68
+ end
69
+
70
+ #group_options is a array of this form: [ {:output_field_name => "", :method => "" , :input_field_name => "" }]
71
+ #example: [ {:output_field_name => "spend", :method => method(:sum), :input_field_name => "cpc"}]
72
+ def summarize_records(records, group_options)
73
+ return [] if records.nil? or records.empty?
74
+ unified_records = {}
75
+ records.each do |record|
76
+ unified_key = record.required_fields.to_a.join(";")
77
+ unified_records[unified_key] ||= record.required_fields
78
+ group_options.each do |group_operation|
79
+ group_operation[:method].call(unified_records[unified_key], record, group_operation[:input_field_name], group_operation[:output_field_name])
80
+ end
81
+ end
82
+ return unified_records.values
83
+ end
84
+
85
+ def sum(unified_record, record, input_field_name, output_field_name)
86
+ raise ArgumentError, "Unsupported field name for group function: #{input_field_name}" if record.group_fields[input_field_name].nil?
87
+ unified_record[output_field_name] ||= 0
88
+ unified_record[output_field_name] += record.group_fields[input_field_name].to_f
89
+ end
90
+
91
+ def count(unified_record, record, input_field_name, output_field_name)
92
+ raise ArgumentError, "Unsupported field name for group function: #{input_field_name}" if record.group_fields[input_field_name].nil?
93
+ unified_record[output_field_name] ||= 0
94
+ unified_record[output_field_name] += 1 if record.group_fields[input_field_name]
95
+ end
96
+
97
+ def minimum(unified_record, record, input_field_name, output_field_name)
98
+ raise ArgumentError, "Unsupported field name for group function: #{input_field_name}" if record.group_fields[input_field_name].nil?
99
+ unified_record[output_field_name] ||= 1000
100
+ unified_record[output_field_name] = [unified_record[output_field_name].to_f,record.group_fields[input_field_name].to_f].min
101
+ end
102
+
103
+ def maximum(unified_record, record, input_field_name, output_field_name)
104
+ raise ArgumentError, "Unsupported field name for group function: #{input_field_name}" if record.group_fields[input_field_name].nil?
105
+ unified_record[output_field_name] ||= 0
106
+ unified_record[output_field_name] = [unified_record[output_field_name].to_f,record.group_fields[input_field_name].to_f].max
107
+ end
108
+
109
+ def average(unified_record, record, input_field_name, output_field_name)
110
+ raise ArgumentError, "Unsupported field name for group function: #{input_field_name}" if record.group_fields[input_field_name].nil?
111
+ unified_record[output_field_name] ||= 0
112
+ sum(unified_record,record,input_field_name,output_field_name+"_sum")
113
+ count(unified_record,record,input_field_name,output_field_name+"_count")
114
+ unified_record[output_field_name] ||= 0
115
+ unified_record[output_field_name] = (( unified_record[output_field_name+"_count"] > 0 ) ? (unified_record[output_field_name+"_sum"] / unified_record[output_field_name+"_count"]) : 0.0 )
116
+ end
117
+
118
+ end
119
+ end
@@ -0,0 +1,93 @@
1
+
2
+ module ACSummarizationUtils
3
+ module ValidationUtils
4
+
5
+ ALLOWED_CLICK_TYPES = [1, 2, 5, 6, 1005]
6
+ REAL_CLICKS = [1, 3, 5, 11, 107, 108, 801, 805]
7
+ INVALID_CLICK_TYPE = -1
8
+ CPC = 1
9
+ CPV = 3
10
+
11
+ def common_click_validations(message)
12
+ click_type = message["clicktype"].nil? ? message["ct"] : message["clicktype"]
13
+ return [INVALID_CLICK_TYPE, -1] if click_type.to_s.empty? and (message["clickurl"].to_s.empty? or message["clickurl"] == 'NULL')
14
+ raise ArgumentError, "message required field click_type not ct doesn't exist" if click_type.nil?
15
+ valid = message["valid"].nil? ? message["validitytype"] : message["valid"]
16
+ raise ArgumentError, "message required field valid not validitytype doesn't exist" if valid.nil?
17
+ ["ad_id","advertiser_id","cpc"].each do |field_name|
18
+ raise ArgumentError, "message required field '#{field_name}' doesn't exist" if message[field_name].nil?
19
+ end
20
+ raise ArgumentError, "invalid ad id: #{message["ad_id"]}" unless message["ad_id"].to_s =~ /^[-]?[0-9]+$/
21
+ [click_type,valid]
22
+ end
23
+
24
+ def is_valid_real_click?(message, campaign_type)
25
+ click_type, valid = common_click_validations(message)
26
+ if (REAL_CLICKS.include?(click_type.to_i) or (1005 == click_type.to_i and campaign_type.to_i == CPC))
27
+ return (message["ad_id"].to_i != -1 and message["advertiser_id"].to_i == 8 and valid.to_i == 1)
28
+ end
29
+ return false
30
+ end
31
+
32
+ def is_valid_direct_click?(message)
33
+ click_type, valid = common_click_validations(message)
34
+ return (message["ad_id"].to_i != -1 and message["advertiser_id"].to_i == 8 and valid.to_i == 1 and
35
+ message["cpc"].to_f >= 0.0 and ALLOWED_CLICK_TYPES.include?(click_type.to_i))
36
+ end
37
+
38
+ def is_valid_direct_layer_impr?(message)
39
+ ["ad_id","advertiserid"].each do |field_name|
40
+ raise ArgumentError, "message required field '#{field_name}' doesn't exist" if message[field_name].nil?
41
+ end
42
+ return (message["ad_id"].to_i != -1 and message["advertiserid"].to_i == 8)
43
+ end
44
+
45
+ def is_valid_conversion?(message)
46
+ ["adid","conversioneventid","publisherid"].each do |field_name|
47
+ raise ArgumentError, "message required field '#{field_name}' doesn't exist" if message[field_name].nil?
48
+ end
49
+ return (message["adid"].to_i != -1 and message["conversioneventid"].to_i > 0)
50
+ end
51
+
52
+ def is_valid_dynamic_content_event?(message)
53
+ ["cid"].each do |field_name|
54
+ raise ArgumentError, "message required field '#{field_name}' doesn't exist" if message[field_name].nil?
55
+ end
56
+ return (message["cid"].to_i != -1)
57
+ end
58
+
59
+ def is_valid_layer_impr?(message)
60
+ ["ad_id","advertiserid","cpc"].each do |field_name|
61
+ raise ArgumentError, "message required field '#{field_name}' doesn't exist" if message[field_name].nil?
62
+ end
63
+ return message["cpc"].to_f >= 0.0
64
+ end
65
+
66
+ def is_valid_click?(message)
67
+ click_type, valid = common_click_validations(message)
68
+ publisher_field = message["publisher_id"] || message["p"]
69
+ raise ArgumentError, "message required field 'publisher_id' or 'p' doesn't exist" if publisher_field.nil?
70
+
71
+ return ( valid.to_i == 1 and message["cpc"].to_f >= 0.0 and ALLOWED_CLICK_TYPES.include?(click_type.to_i))
72
+ end
73
+
74
+ def is_valid_impression?(message)
75
+ ["pid", "itid"].each do |field_name|
76
+ raise ArgumentError, "message required field '#{field_name}' doesn't exist" if message[field_name].nil?
77
+ end
78
+ return message["itid"].to_i == 3
79
+ end
80
+
81
+ def is_valid_inimage_report?(message)
82
+ ["value"].each do |field_name|
83
+ raise ArgumentError, "message required field '#{field_name}' doesn't exist" if message[field_name].nil?
84
+ end
85
+ value = message["value"]
86
+ ["account_id"].each do |field_name|
87
+ raise ArgumentError, "message required field '#{field_name}' doesn't exist" if value[field_name].nil?
88
+ end
89
+ return message["value"]["account_id"] != ""
90
+ end
91
+
92
+ end
93
+ end