data_drain 0.2.1 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -6,10 +6,13 @@ module DataDrain
6
6
  # Contenedor para todas las opciones de configuración del motor DataDrain.
7
7
  class Configuration
8
8
  attr_accessor :storage_mode, :aws_region,
9
- :aws_access_key_id, :aws_secret_access_key,
10
- :db_host, :db_port, :db_user, :db_pass, :db_name,
11
- :batch_size, :throttle_delay, :logger, :limit_ram, :tmp_directory,
12
- :idle_in_transaction_session_timeout
9
+ :aws_access_key_id, :aws_secret_access_key,
10
+ :db_host, :db_port, :db_user, :db_pass, :db_name,
11
+ :batch_size, :throttle_delay, :logger, :limit_ram, :tmp_directory,
12
+ :idle_in_transaction_session_timeout,
13
+ :vacuum_after_purge,
14
+ :slow_batch_threshold_s,
15
+ :slow_batch_alert_after
13
16
 
14
17
  def initialize
15
18
  @storage_mode = :local
@@ -20,12 +23,59 @@ module DataDrain
20
23
  @limit_ram = nil # eg 2GB
21
24
  @tmp_directory = nil # eg /tmp/duckdb_work
22
25
  @idle_in_transaction_session_timeout = 0
23
- @logger = Logger.new($stdout)
26
+ @vacuum_after_purge = false
27
+ @slow_batch_threshold_s = 30
28
+ @slow_batch_alert_after = 5
29
+ @logger = Logger.new($stdout)
24
30
  end
25
31
 
26
32
  # @return [String] Cadena de conexión optimizada para DuckDB.
27
33
  def duckdb_connection_string
28
34
  "postgresql://#{@db_user}:#{@db_pass}@#{@db_host}:#{@db_port}/#{@db_name}?options=-c%20idle_in_transaction_session_timeout%3D#{@idle_in_transaction_session_timeout}"
29
35
  end
36
+
37
+ # Valida invariantes generales (storage_mode + AWS si aplica).
38
+ # Llamado por FileIngestor#initialize y GlueRunner.run_and_wait.
39
+ #
40
+ # @raise [DataDrain::ConfigurationError]
41
+ def validate!
42
+ validate_storage_mode!
43
+ validate_aws_config! if storage_mode.to_sym == :s3
44
+ end
45
+
46
+ # Valida además las credenciales PostgreSQL.
47
+ # Llamado por Engine#initialize.
48
+ #
49
+ # @raise [DataDrain::ConfigurationError]
50
+ def validate_for_engine!
51
+ validate!
52
+ validate_db_config!
53
+ end
54
+
55
+ private
56
+
57
+ def validate_storage_mode!
58
+ return if %i[local s3].include?(storage_mode.to_sym)
59
+
60
+ raise DataDrain::ConfigurationError,
61
+ "storage_mode debe ser :local o :s3, recibido #{storage_mode.inspect}"
62
+ end
63
+
64
+ def validate_aws_config!
65
+ return unless aws_region.nil? || aws_region.to_s.empty?
66
+
67
+ raise DataDrain::ConfigurationError,
68
+ "aws_region es obligatorio con storage_mode = :s3"
69
+ end
70
+
71
+ def validate_db_config!
72
+ %i[db_host db_user db_name].each do |attr|
73
+ val = public_send(attr)
74
+ next unless val.nil? || val.to_s.empty?
75
+
76
+ raise DataDrain::ConfigurationError,
77
+ "config.#{attr} es obligatorio para Engine (storage_mode=#{storage_mode})"
78
+ end
79
+ end
30
80
  end
31
81
  end
@@ -5,12 +5,12 @@ require "pg"
5
5
 
6
6
  module DataDrain
7
7
  # Motor principal de extracción y purga de datos (DataDrain).
8
- # rubocop:disable Metrics/ClassLength, Metrics/AbcSize, Metrics/MethodLength, Naming/AccessorMethodName
9
8
  #
10
9
  # Orquesta el flujo ETL desde PostgreSQL hacia un Data Lake analítico
11
10
  # delegando la interacción del almacenamiento al adaptador configurado.
12
11
  class Engine
13
12
  include Observability
13
+ include Observability::Timing
14
14
  # Inicializa una nueva instancia del motor de extracción.
15
15
  #
16
16
  # @param options [Hash] Diccionario de configuración para la extracción.
@@ -42,6 +42,7 @@ module DataDrain
42
42
  @skip_export = options.fetch(:skip_export, false)
43
43
 
44
44
  @config = DataDrain.configuration
45
+ @config.validate_for_engine!
45
46
  @logger = @config.logger
46
47
  @adapter = DataDrain::Storage.adapter
47
48
 
@@ -49,70 +50,91 @@ module DataDrain
49
50
  @duckdb = database.connect
50
51
  end
51
52
 
52
- # Ejecuta el flujo completo del motor: Setup, Conteo, Exportación (opcional), Verificación y Purga.
53
- #
54
- # @return [Boolean] `true` si el proceso finalizó con éxito, `false` si falló la integridad.
55
53
  def call
56
- start_time = Process.clock_gettime(Process::CLOCK_MONOTONIC)
57
- safe_log(:info, "engine.start",
58
- { table: @table_name, start_date: @start_date.to_date, end_date: @end_date.to_date })
54
+ @durations = {}
55
+ start_time = monotonic
56
+ log_start
59
57
 
60
58
  setup_duckdb
59
+ return skip_empty(start_time) if step_count.zero?
61
60
 
62
- # 1. Conteo inicial en Postgres
63
- step_start = Process.clock_gettime(Process::CLOCK_MONOTONIC)
64
- @pg_count = get_postgres_count
65
- db_query_duration = Process.clock_gettime(Process::CLOCK_MONOTONIC) - step_start
66
-
67
- if @pg_count.zero?
68
- duration = Process.clock_gettime(Process::CLOCK_MONOTONIC) - start_time
69
- safe_log(:info, "engine.skip_empty",
70
- { table: @table_name, duration_s: duration.round(2), db_query_duration_s: db_query_duration.round(2) })
71
- return true
72
- end
73
-
74
- # 2. Exportación
75
- export_duration = 0.0
76
61
  if @skip_export
77
62
  safe_log(:info, "engine.skip_export", { table: @table_name })
78
63
  else
79
- safe_log(:info, "engine.export_start", { table: @table_name, count: @pg_count })
80
- step_start = Process.clock_gettime(Process::CLOCK_MONOTONIC)
81
- export_to_parquet
82
- export_duration = Process.clock_gettime(Process::CLOCK_MONOTONIC) - step_start
64
+ step_export
83
65
  end
66
+ return integrity_failed(start_time) unless step_verify
84
67
 
85
- # 3. Verificación de Integridad
86
- step_start = Process.clock_gettime(Process::CLOCK_MONOTONIC)
87
- integrity_ok = verify_integrity
88
- integrity_duration = Process.clock_gettime(Process::CLOCK_MONOTONIC) - step_start
68
+ step_purge
69
+ log_complete(start_time)
70
+ true
71
+ end
89
72
 
90
- if integrity_ok
91
- # 4. Purga en Postgres
92
- step_start = Process.clock_gettime(Process::CLOCK_MONOTONIC)
93
- purge_from_postgres
94
- purge_duration = Process.clock_gettime(Process::CLOCK_MONOTONIC) - step_start
73
+ private
95
74
 
96
- duration = Process.clock_gettime(Process::CLOCK_MONOTONIC) - start_time
97
- safe_log(:info, "engine.complete", {
98
- table: @table_name,
99
- duration_s: duration.round(2),
100
- db_query_duration_s: db_query_duration.round(2),
101
- export_duration_s: export_duration.round(2),
102
- integrity_duration_s: integrity_duration.round(2),
103
- purge_duration_s: purge_duration.round(2),
104
- count: @pg_count
105
- })
106
- true
107
- else
108
- duration = Process.clock_gettime(Process::CLOCK_MONOTONIC) - start_time
109
- safe_log(:error, "engine.integrity_error",
110
- { table: @table_name, duration_s: duration.round(2), count: @pg_count })
111
- false
112
- end
75
+ # @api private
76
+ def log_start
77
+ safe_log(:info, "engine.start",
78
+ { table: @table_name, start_date: @start_date.to_date, end_date: @end_date.to_date })
113
79
  end
114
80
 
115
- private
81
+ # @api private
82
+ def step_count
83
+ @pg_count = timed(:db_query) { get_postgres_count }
84
+ @pg_count
85
+ end
86
+
87
+ # @api private
88
+ def skip_empty(start_time)
89
+ duration = monotonic - start_time
90
+ safe_log(:info, "engine.skip_empty", {
91
+ table: @table_name,
92
+ duration_s: duration.round(2),
93
+ db_query_duration_s: @durations.fetch(:db_query, 0).round(2)
94
+ })
95
+ true
96
+ end
97
+
98
+ # @api private
99
+ def step_export
100
+ safe_log(:info, "engine.export_start", { table: @table_name, count: @pg_count })
101
+ timed(:export) { export_to_parquet }
102
+ end
103
+
104
+ # @api private
105
+ def step_verify
106
+ timed(:integrity) { verify_integrity }
107
+ end
108
+
109
+ # @api private
110
+ def step_purge
111
+ timed(:purge) { purge_from_postgres }
112
+ end
113
+
114
+ # @api private
115
+ def log_complete(start_time)
116
+ duration = monotonic - start_time
117
+ safe_log(:info, "engine.complete", {
118
+ table: @table_name,
119
+ duration_s: duration.round(2),
120
+ db_query_duration_s: @durations.fetch(:db_query, 0).round(2),
121
+ export_duration_s: @durations.fetch(:export, 0).round(2),
122
+ integrity_duration_s: @durations.fetch(:integrity, 0).round(2),
123
+ purge_duration_s: @durations.fetch(:purge, 0).round(2),
124
+ count: @pg_count
125
+ })
126
+ end
127
+
128
+ # @api private
129
+ def integrity_failed(start_time)
130
+ duration = monotonic - start_time
131
+ safe_log(:error, "engine.integrity_error", {
132
+ table: @table_name,
133
+ duration_s: duration.round(2),
134
+ count: @pg_count
135
+ })
136
+ false
137
+ end
116
138
 
117
139
  # @api private
118
140
  # @return [String]
@@ -212,40 +234,129 @@ module DataDrain
212
234
  conn.exec("SET idle_in_transaction_session_timeout = #{@config.idle_in_transaction_session_timeout};")
213
235
  end
214
236
 
237
+ total_deleted = purge_loop(conn)
238
+
239
+ vacuum_if_needed(conn, total_deleted)
240
+ ensure
241
+ conn&.close
242
+ end
243
+
244
+ # @api private
245
+ def vacuum_if_needed(conn, total_deleted)
246
+ return unless @config.vacuum_after_purge
247
+ return if total_deleted.zero?
248
+
249
+ vacuum_start = monotonic
250
+ dead_before = fetch_dead_tuple_count(conn)
251
+
252
+ begin
253
+ conn.exec("VACUUM ANALYZE #{@table_name};")
254
+ rescue PG::Error => e
255
+ safe_log(:warn, "engine.vacuum_error", {
256
+ table: @table_name,
257
+ dead_tuples_before: dead_before,
258
+ rows_deleted_count: total_deleted,
259
+ duration_s: (monotonic - vacuum_start).round(2)
260
+ }.merge(exception_metadata(e)))
261
+ return
262
+ end
263
+
264
+ dead_after = fetch_dead_tuple_count(conn)
265
+ vacuum_duration = monotonic - vacuum_start
266
+
267
+ safe_log(:info, "engine.vacuum_complete", {
268
+ table: @table_name,
269
+ duration_s: vacuum_duration.round(2),
270
+ dead_tuples_before: dead_before,
271
+ dead_tuples_after: dead_after,
272
+ rows_deleted_count: total_deleted
273
+ })
274
+ end
275
+
276
+ # @api private
277
+ def fetch_dead_tuple_count(conn)
278
+ result = conn.exec_params(
279
+ "SELECT n_dead_tup FROM pg_stat_user_tables WHERE relname = $1",
280
+ [@table_name]
281
+ )
282
+ result.first&.dig("n_dead_tup")&.to_i || 0
283
+ rescue PG::Error
284
+ -1
285
+ end
286
+
287
+ # @api private
288
+ # @param conn [PG::Connection]
289
+ # @return [Integer] total de filas borradas
290
+ def purge_loop(conn)
215
291
  batches_processed = 0
216
292
  total_deleted = 0
293
+ slow_batch_streak = 0
217
294
 
218
295
  loop do
219
- sql = <<~SQL
220
- DELETE FROM #{@table_name}
221
- WHERE #{@primary_key} IN (
222
- SELECT #{@primary_key} FROM #{@table_name}
223
- WHERE #{base_where_sql}
224
- LIMIT #{@config.batch_size}
225
- )
226
- SQL
227
-
228
- result = conn.exec(sql)
296
+ batch_start = monotonic
297
+ result = conn.exec(build_delete_sql)
298
+ batch_duration = monotonic - batch_start
229
299
  count = result.cmd_tuples
230
300
  break if count.zero?
231
301
 
232
302
  batches_processed += 1
233
303
  total_deleted += count
234
304
 
235
- # Heartbeat cada 100 lotes para monitorear procesos largos de 1TB
236
- if (batches_processed % 100).zero?
237
- safe_log(:info, "engine.purge_heartbeat", {
305
+ slow_batch_streak = handle_batch_timing(batch_duration, count, slow_batch_streak)
306
+ emit_heartbeat_if_due(batches_processed, total_deleted)
307
+
308
+ sleep(@config.throttle_delay) if @config.throttle_delay.positive?
309
+ end
310
+
311
+ total_deleted
312
+ end
313
+
314
+ # @api private
315
+ def handle_batch_timing(batch_duration, count, streak)
316
+ if batch_duration > @config.slow_batch_threshold_s
317
+ streak += 1
318
+ safe_log(:warn, "engine.slow_batch", {
319
+ table: @table_name,
320
+ batch_duration_s: batch_duration.round(2),
321
+ batch_size: count,
322
+ streak: streak,
323
+ threshold_s: @config.slow_batch_threshold_s
324
+ })
325
+
326
+ if streak == @config.slow_batch_alert_after
327
+ safe_log(:warn, "engine.purge_degraded", {
238
328
  table: @table_name,
239
- batches_processed_count: batches_processed,
240
- rows_deleted_count: total_deleted
329
+ consecutive_slow_batches: streak,
330
+ hint: "considerar índice composite o particionamiento (ver postgres-tuning.md)"
241
331
  })
242
332
  end
243
-
244
- sleep(@config.throttle_delay) if @config.throttle_delay.positive?
333
+ streak
334
+ else
335
+ 0
245
336
  end
246
- ensure
247
- conn&.close
337
+ end
338
+
339
+ # @api private
340
+ def emit_heartbeat_if_due(batches_processed, total_deleted)
341
+ return unless (batches_processed % 100).zero?
342
+
343
+ safe_log(:info, "engine.purge_heartbeat", {
344
+ table: @table_name,
345
+ batches_processed_count: batches_processed,
346
+ rows_deleted_count: total_deleted
347
+ })
348
+ end
349
+
350
+ # @api private
351
+ def build_delete_sql
352
+ <<~SQL
353
+ DELETE FROM #{@table_name}
354
+ WHERE #{@primary_key} IN (
355
+ SELECT #{@primary_key} FROM #{@table_name}
356
+ WHERE #{base_where_sql}
357
+ LIMIT #{@config.batch_size}
358
+ )
359
+ SQL
248
360
  end
249
361
  end
250
- # rubocop:enable Metrics/ClassLength, Metrics/AbcSize, Metrics/MethodLength, Naming/AccessorMethodName
251
362
  end
@@ -6,8 +6,7 @@ module DataDrain
6
6
  # aplicando compresión ZSTD y particionamiento Hive.
7
7
  class FileIngestor
8
8
  include Observability
9
- # rubocop:disable Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity,
10
- # Metrics/MethodLength
9
+ include Observability::Timing
11
10
 
12
11
  # @param options [Hash] Opciones de ingestión.
13
12
  # @option options [String] :source_path Ruta absoluta al archivo local.
@@ -25,6 +24,7 @@ module DataDrain
25
24
  @bucket = options[:bucket]
26
25
 
27
26
  @config = DataDrain.configuration
27
+ @config.validate!
28
28
  @logger = @config.logger
29
29
  @adapter = DataDrain::Storage.adapter
30
30
 
@@ -35,46 +35,77 @@ module DataDrain
35
35
  # Ejecuta el flujo de ingestión.
36
36
  # @return [Boolean] true si el proceso fue exitoso.
37
37
  def call
38
- start_time = Process.clock_gettime(Process::CLOCK_MONOTONIC)
38
+ @durations = {}
39
+ start_time = monotonic
39
40
  safe_log(:info, "file_ingestor.start", { source_path: @source_path })
40
41
 
41
- unless File.exist?(@source_path)
42
- safe_log(:error, "file_ingestor.file_not_found", { source_path: @source_path })
43
- return false
44
- end
42
+ return file_not_found(start_time) unless step_validate_file
43
+
44
+ step_setup_duckdb
45
+ @reader_function = determine_reader
46
+ @source_count = step_count_source
47
+
48
+ return skip_empty(start_time) if @source_count.zero?
49
+
50
+ step_export
51
+ log_complete(start_time)
52
+ cleanup_local_file
53
+ true
54
+ rescue DuckDB::Error => e
55
+ duration = monotonic - start_time
56
+ safe_log(:error, "file_ingestor.duckdb_error",
57
+ { source_path: @source_path }.merge(exception_metadata(e)).merge(duration_s: duration.round(2)))
58
+ false
59
+ ensure
60
+ @duckdb&.close
61
+ end
62
+
63
+ private
64
+
65
+ # @api private
66
+ def file_not_found(_start_time)
67
+ safe_log(:error, "file_ingestor.file_not_found", { source_path: @source_path })
68
+ false
69
+ end
70
+
71
+ # @api private
72
+ def step_validate_file
73
+ File.exist?(@source_path)
74
+ end
45
75
 
76
+ # @api private
77
+ def step_setup_duckdb
46
78
  @duckdb.query("SET max_memory='#{@config.limit_ram}';") if @config.limit_ram.present?
47
79
  @duckdb.query("SET temp_directory='#{@config.tmp_directory}'") if @config.tmp_directory.present?
48
-
49
80
  @adapter.setup_duckdb(@duckdb)
81
+ end
50
82
 
51
- # Determinamos la función lectora de DuckDB según la extensión del archivo
52
- reader_function = determine_reader
53
-
54
- # 1. Conteo de seguridad
55
- step_start = Process.clock_gettime(Process::CLOCK_MONOTONIC)
56
- source_count = @duckdb.query("SELECT COUNT(*) FROM #{reader_function}").first.first
57
- source_query_duration = Process.clock_gettime(Process::CLOCK_MONOTONIC) - step_start
83
+ # @api private
84
+ def step_count_source
85
+ source_count = timed(:source_query) { @duckdb.query("SELECT COUNT(*) FROM #{@reader_function}").first.first }
58
86
  safe_log(:info, "file_ingestor.count", {
59
87
  source_path: @source_path,
60
88
  count: source_count,
61
- source_query_duration_s: source_query_duration.round(2)
89
+ source_query_duration_s: @durations.fetch(:source_query, 0).round(2)
62
90
  })
91
+ source_count
92
+ end
63
93
 
64
- if source_count.zero?
65
- cleanup_local_file
66
- duration = Process.clock_gettime(Process::CLOCK_MONOTONIC) - start_time
67
- safe_log(:info, "file_ingestor.skip_empty", { source_path: @source_path, duration_s: duration.round(2) })
68
- return true
69
- end
94
+ # @api private
95
+ def skip_empty(start_time)
96
+ cleanup_local_file
97
+ duration = monotonic - start_time
98
+ safe_log(:info, "file_ingestor.skip_empty", { source_path: @source_path, duration_s: duration.round(2) })
99
+ true
100
+ end
70
101
 
71
- # 2. Exportación / Subida
102
+ # @api private
103
+ def step_export
72
104
  @adapter.prepare_export_path(@bucket, @folder_name)
73
105
  dest_path = if @config.storage_mode.to_sym == :s3
74
106
  "s3://#{@bucket}/#{@folder_name}/"
75
107
  else
76
- File.join(@bucket,
77
- @folder_name, "")
108
+ File.join(@bucket, @folder_name, "")
78
109
  end
79
110
 
80
111
  partition_clause = @partition_keys.any? ? "PARTITION_BY (#{@partition_keys.join(", ")})," : ""
@@ -82,7 +113,7 @@ module DataDrain
82
113
  query = <<~SQL
83
114
  COPY (
84
115
  SELECT #{@select_sql}
85
- FROM #{reader_function}
116
+ FROM #{@reader_function}
86
117
  ) TO '#{dest_path}'
87
118
  (
88
119
  FORMAT PARQUET,
@@ -93,32 +124,21 @@ module DataDrain
93
124
  SQL
94
125
 
95
126
  safe_log(:info, "file_ingestor.export_start", { dest_path: dest_path })
96
- step_start = Process.clock_gettime(Process::CLOCK_MONOTONIC)
97
- @duckdb.query(query)
98
- export_duration = Process.clock_gettime(Process::CLOCK_MONOTONIC) - step_start
127
+ timed(:export) { @duckdb.query(query) }
128
+ end
99
129
 
100
- duration = Process.clock_gettime(Process::CLOCK_MONOTONIC) - start_time
130
+ # @api private
131
+ def log_complete(start_time)
132
+ duration = monotonic - start_time
101
133
  safe_log(:info, "file_ingestor.complete", {
102
134
  source_path: @source_path,
103
135
  duration_s: duration.round(2),
104
- source_query_duration_s: source_query_duration.round(2),
105
- export_duration_s: export_duration.round(2),
106
- count: source_count
136
+ source_query_duration_s: @durations.fetch(:source_query, 0).round(2),
137
+ export_duration_s: @durations.fetch(:export, 0).round(2),
138
+ count: @source_count
107
139
  })
108
-
109
- cleanup_local_file
110
- true
111
- rescue DuckDB::Error => e
112
- duration = Process.clock_gettime(Process::CLOCK_MONOTONIC) - start_time
113
- safe_log(:error, "file_ingestor.duckdb_error",
114
- { source_path: @source_path }.merge(exception_metadata(e)).merge(duration_s: duration.round(2)))
115
- false
116
- ensure
117
- @duckdb&.close
118
140
  end
119
141
 
120
- private
121
-
122
142
  # @api private
123
143
  def determine_reader
124
144
  case File.extname(@source_path).downcase
@@ -141,6 +161,4 @@ module DataDrain
141
161
  safe_log(:info, "file_ingestor.cleanup", { source_path: @source_path })
142
162
  end
143
163
  end
144
- # rubocop:enable Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity,
145
- # Metrics/MethodLength
146
164
  end
@@ -14,22 +14,35 @@ module DataDrain
14
14
  # @param job_name [String] Nombre del Job en la consola de AWS.
15
15
  # @param arguments [Hash] Argumentos de ejecución (deben empezar con --).
16
16
  # @param polling_interval [Integer] Segundos de espera entre cada chequeo de estado.
17
+ # @param max_wait_seconds [Integer, nil] Timeout máximo en segundos.
18
+ # nil = sin límite (comportamiento anterior).
17
19
  # @return [Boolean] true si el Job terminó exitosamente (SUCCEEDED).
18
- # @raise [RuntimeError] Si el Job falla o se detiene.
19
- def self.run_and_wait(job_name, arguments = {}, polling_interval: 30)
20
+ # @raise [DataDrain::Error] si max_wait_seconds excede antes de SUCCEEDED.
21
+ # @raise [RuntimeError] si el Job falla o se detiene.
22
+ def self.run_and_wait(job_name, arguments = {}, polling_interval: 30, max_wait_seconds: nil)
20
23
  config = DataDrain.configuration
24
+ config.validate!
21
25
  client = Aws::Glue::Client.new(region: config.aws_region)
22
26
  start_time = Process.clock_gettime(Process::CLOCK_MONOTONIC)
23
27
 
24
- # Usamos el logger de la configuración directamente para el primer log antes de instanciar safe_log si fuera necesario
25
- # Pero como extendemos Observability, usamos safe_log directamente.
26
- @logger = config.logger
28
+ @logger = config.logger
27
29
 
28
30
  safe_log(:info, "glue_runner.start", { job: job_name })
29
31
  resp = client.start_job_run(job_name: job_name, arguments: arguments)
30
32
  run_id = resp.job_run_id
31
33
 
32
34
  loop do
35
+ if max_wait_seconds &&
36
+ (Process.clock_gettime(Process::CLOCK_MONOTONIC) - start_time) > max_wait_seconds
37
+ safe_log(:error, "glue_runner.timeout", {
38
+ job: job_name,
39
+ run_id: run_id,
40
+ max_wait_seconds: max_wait_seconds
41
+ })
42
+ raise DataDrain::Error,
43
+ "Glue Job #{job_name} (Run ID: #{run_id}) excedió max_wait_seconds=#{max_wait_seconds}"
44
+ end
45
+
33
46
  run_info = client.get_job_run(job_name: job_name, run_id: run_id).job_run
34
47
  status = run_info.job_run_state
35
48
 
@@ -41,15 +54,14 @@ module DataDrain
41
54
  when "FAILED", "STOPPED", "TIMEOUT"
42
55
  duration = Process.clock_gettime(Process::CLOCK_MONOTONIC) - start_time
43
56
  error_metadata = { job: job_name, run_id: run_id, status: status, duration_s: duration.round(2) }
44
-
45
- if run_info.error_message
46
- error_metadata[:error_message] = run_info.error_message.gsub("\"", "'")[0, 200]
47
- end
57
+
58
+ error_metadata[:error_message] = run_info.error_message.gsub("\"", "'")[0, 200] if run_info.error_message
48
59
 
49
60
  safe_log(:error, "glue_runner.failed", error_metadata)
50
61
  raise "Glue Job #{job_name} (Run ID: #{run_id}) falló con estado #{status}."
51
62
  else
52
- safe_log(:info, "glue_runner.polling", { job: job_name, run_id: run_id, status: status, next_check_in_s: polling_interval })
63
+ safe_log(:info, "glue_runner.polling",
64
+ { job: job_name, run_id: run_id, status: status, next_check_in_s: polling_interval })
53
65
  sleep polling_interval
54
66
  end
55
67
  end
@@ -0,0 +1,23 @@
1
+ # frozen_string_literal: true
2
+
3
+ module DataDrain
4
+ module Observability
5
+ # Helper para medición de duración de operaciones.
6
+ # @api private
7
+ module Timing
8
+ private
9
+
10
+ def monotonic
11
+ Process.clock_gettime(Process::CLOCK_MONOTONIC)
12
+ end
13
+
14
+ def timed(step_name)
15
+ t = monotonic
16
+ result = yield
17
+ @durations ||= {}
18
+ @durations[step_name] = monotonic - t
19
+ result
20
+ end
21
+ end
22
+ end
23
+ end
@@ -1,12 +1,14 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module DataDrain
4
- # Módulo interno para garantizar que la telemetría cumpla con los
4
+ # Módulo interno para garantizar que la telemetría cumpla con los
5
5
  # Global-Observability-Standards: resiliencia, KV-structured y precisión.
6
6
  #
7
7
  # Este módulo es genérico y puede ser utilizado en otras gemas.
8
8
  # @api private
9
9
  module Observability
10
+ SENSITIVE_KEY_PATTERN = /password|passwd|pass|secret|token|api_key|apikey|auth|credential|private_key/i
11
+
10
12
  private
11
13
 
12
14
  # Emite un log estructurado de forma segura.
@@ -19,7 +21,7 @@ module DataDrain
19
21
 
20
22
  # Enmascaramiento preventivo de secretos (Security)
21
23
  log_line = fields.map do |k, v|
22
- val = %i[password token secret api_key auth].include?(k.to_sym) ? "[FILTERED]" : v
24
+ val = SENSITIVE_KEY_PATTERN.match?(k.to_s) ? "[FILTERED]" : v
23
25
  "#{k}=#{val}"
24
26
  end.join(" ")
25
27