openc3 7.0.1 → 7.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (66) hide show
  1. checksums.yaml +4 -4
  2. data/bin/openc3cli +47 -3
  3. data/data/config/item_modifiers.yaml +1 -1
  4. data/data/config/microservice.yaml +12 -1
  5. data/data/config/parameter_modifiers.yaml +49 -7
  6. data/data/config/target.yaml +11 -0
  7. data/data/config/target_config.yaml +6 -2
  8. data/lib/openc3/api/cmd_api.rb +2 -1
  9. data/lib/openc3/api/metrics_api.rb +11 -1
  10. data/lib/openc3/api/tlm_api.rb +21 -6
  11. data/lib/openc3/core_ext/faraday.rb +1 -1
  12. data/lib/openc3/io/json_api.rb +1 -1
  13. data/lib/openc3/logs/log_writer.rb +3 -1
  14. data/lib/openc3/microservices/decom_common.rb +128 -0
  15. data/lib/openc3/microservices/decom_microservice.rb +26 -95
  16. data/lib/openc3/microservices/interface_decom_common.rb +6 -2
  17. data/lib/openc3/microservices/interface_microservice.rb +10 -8
  18. data/lib/openc3/microservices/log_microservice.rb +1 -1
  19. data/lib/openc3/microservices/microservice.rb +3 -2
  20. data/lib/openc3/microservices/queue_microservice.rb +1 -1
  21. data/lib/openc3/microservices/scope_cleanup_microservice.rb +60 -46
  22. data/lib/openc3/microservices/text_log_microservice.rb +1 -2
  23. data/lib/openc3/models/cvt_model.rb +24 -13
  24. data/lib/openc3/models/db_sharded_model.rb +110 -0
  25. data/lib/openc3/models/interface_model.rb +9 -0
  26. data/lib/openc3/models/interface_status_model.rb +33 -3
  27. data/lib/openc3/models/metric_model.rb +96 -37
  28. data/lib/openc3/models/microservice_model.rb +7 -0
  29. data/lib/openc3/models/microservice_status_model.rb +30 -3
  30. data/lib/openc3/models/reingest_job_model.rb +153 -0
  31. data/lib/openc3/models/scope_model.rb +3 -2
  32. data/lib/openc3/models/script_status_model.rb +4 -20
  33. data/lib/openc3/models/target_model.rb +113 -100
  34. data/lib/openc3/packets/packet_config.rb +4 -1
  35. data/lib/openc3/script/script.rb +2 -2
  36. data/lib/openc3/script/script_runner.rb +4 -4
  37. data/lib/openc3/script/telemetry.rb +3 -3
  38. data/lib/openc3/script/web_socket_api.rb +29 -22
  39. data/lib/openc3/system/system.rb +20 -3
  40. data/lib/openc3/topics/command_decom_topic.rb +4 -2
  41. data/lib/openc3/topics/command_topic.rb +8 -5
  42. data/lib/openc3/topics/decom_interface_topic.rb +15 -10
  43. data/lib/openc3/topics/interface_topic.rb +71 -29
  44. data/lib/openc3/topics/limits_event_topic.rb +62 -41
  45. data/lib/openc3/topics/router_topic.rb +61 -21
  46. data/lib/openc3/topics/system_events_topic.rb +18 -1
  47. data/lib/openc3/topics/telemetry_decom_topic.rb +2 -1
  48. data/lib/openc3/topics/telemetry_topic.rb +4 -2
  49. data/lib/openc3/topics/topic.rb +77 -5
  50. data/lib/openc3/utilities/aws_bucket.rb +2 -0
  51. data/lib/openc3/utilities/cli_generator.rb +3 -2
  52. data/lib/openc3/utilities/metric.rb +15 -1
  53. data/lib/openc3/utilities/questdb_client.rb +173 -37
  54. data/lib/openc3/utilities/reingest_job.rb +377 -0
  55. data/lib/openc3/utilities/ruby_lex_utils.rb +2 -0
  56. data/lib/openc3/utilities/store_autoload.rb +78 -52
  57. data/lib/openc3/utilities/store_queued.rb +20 -12
  58. data/lib/openc3/version.rb +6 -6
  59. data/templates/plugin/plugin.gemspec +13 -1
  60. data/templates/tool_angular/package.json +2 -2
  61. data/templates/tool_react/package.json +1 -1
  62. data/templates/tool_svelte/package.json +1 -1
  63. data/templates/tool_vue/package.json +3 -3
  64. data/templates/tool_vue/src/router.js +2 -2
  65. data/templates/widget/package.json +2 -2
  66. metadata +7 -3
@@ -28,42 +28,104 @@ module OpenC3
28
28
  class QuestDBError < StandardError; end
29
29
 
30
30
  # Thread-local PG connection storage using Concurrent::ThreadLocalVar.
31
- # Each thread gets its own connection to avoid thread-safety issues with PG::Connection.
31
+ # Each thread gets its own connections (per db_shard) to avoid thread-safety issues with PG::Connection.
32
32
  # Connections are automatically garbage collected when threads terminate.
33
- @thread_conn = Concurrent::ThreadLocalVar.new(nil)
33
+ # Value is a Hash: { db_shard_number => PG::Connection }
34
+ @thread_conns = Concurrent::ThreadLocalVar.new { Hash.new } # NOSONAR
35
+
36
+ # DB_Shard cache: { "scope__target_name" => [db_shard_number, Time] }
37
+ @db_shard_cache = {}
38
+ @db_shard_cache_mutex = Mutex.new
39
+ DB_SHARD_CACHE_TIMEOUT = 60 # seconds
40
+
41
+ # Resolve the hostname for a given db_shard number.
42
+ # If OPENC3_TSDB_HOSTNAME contains "SHARDNUM", it is replaced with the db_shard number.
43
+ # Otherwise, all db_shards connect to the same host (backward compatible).
44
+ def self.hostname_for_db_shard(db_shard)
45
+ ENV['OPENC3_TSDB_HOSTNAME'].to_s.gsub("SHARDNUM", db_shard.to_s)
46
+ end
47
+
48
+ # Look up the db_shard number for a target from TargetModel with a 1-minute cache.
49
+ # Non-target-specific data (nil target_name) always returns db_shard 0.
50
+ def self.db_shard_for_target(target_name, scope: "DEFAULT")
51
+ return 0 unless target_name
52
+
53
+ cache_key = "#{scope}__#{target_name}"
54
+ now = Time.now
55
+
56
+ @db_shard_cache_mutex.synchronize do
57
+ cached = @db_shard_cache[cache_key]
58
+ if cached
59
+ db_shard, cached_at = cached
60
+ return db_shard if (now - cached_at) < DB_SHARD_CACHE_TIMEOUT
61
+ end
62
+ end
63
+
64
+ # Cache miss or expired — look up from TargetModel
65
+ begin
66
+ model = TargetModel.get(name: target_name, scope: scope)
67
+ db_shard = model ? model['db_shard'].to_i : 0
68
+ rescue
69
+ db_shard = 0
70
+ end
71
+
72
+ @db_shard_cache_mutex.synchronize do
73
+ @db_shard_cache[cache_key] = [db_shard, now]
74
+ end
75
+
76
+ db_shard
77
+ end
34
78
 
35
- # Get or create a thread-local PG connection with type mapping configured.
79
+ # Get or create a thread-local PG connection for the given db_shard with type mapping configured.
36
80
  # Returns the thread-local connection - callers should not close it.
37
- def self.connection
38
- conn = @thread_conn.value
39
- if conn.nil? || conn.finished?
40
- conn = PG::Connection.new(
41
- host: ENV['OPENC3_TSDB_HOSTNAME'],
42
- port: ENV['OPENC3_TSDB_QUERY_PORT'],
43
- user: ENV['OPENC3_TSDB_USERNAME'],
44
- password: ENV['OPENC3_TSDB_PASSWORD'],
45
- dbname: 'qdb'
46
- )
47
- conn.type_map_for_results = PG::BasicTypeMapForResults.new(conn)
48
- @thread_conn.value = conn
81
+ def self.connection(db_shard: 0)
82
+ conns = @thread_conns.value
83
+ conn = conns[db_shard]
84
+ if conn and not conn.finished?
85
+ begin
86
+ conn.check_socket
87
+ return conn
88
+ rescue
89
+ # Will need to reconnect
90
+ end
49
91
  end
92
+ conn = PG::Connection.new(
93
+ host: hostname_for_db_shard(db_shard),
94
+ port: ENV['OPENC3_TSDB_QUERY_PORT'],
95
+ user: ENV['OPENC3_TSDB_USERNAME'],
96
+ password: ENV['OPENC3_TSDB_PASSWORD'],
97
+ dbname: 'qdb'
98
+ )
99
+ conn.type_map_for_results = PG::BasicTypeMapForResults.new(conn)
100
+ conns[db_shard] = conn
101
+ @thread_conns.value = conns
50
102
  conn
51
103
  end
52
104
 
53
- # Reset the connection for the current thread. Used after errors.
54
- def self.disconnect
55
- conn = @thread_conn.value
56
- if conn && !conn.finished?
57
- conn.finish
105
+ # Reset the connection(s) for the current thread. Used after errors.
106
+ # If db_shard is nil, closes all db_shard connections. Otherwise closes only the specified db_shard.
107
+ def self.disconnect(db_shard: nil)
108
+ conns = @thread_conns.value
109
+ if db_shard.nil?
110
+ conns.each_value do |conn|
111
+ conn.finish if conn && !conn.finished?
112
+ end
113
+ @thread_conns.value = {}
114
+ else
115
+ conn = conns[db_shard]
116
+ if conn && !conn.finished?
117
+ conn.finish
118
+ end
119
+ conns.delete(db_shard)
120
+ @thread_conns.value = conns
58
121
  end
59
- @thread_conn.value = nil
60
122
  end
61
123
 
62
124
  # Health check - attempt to connect and immediately close.
63
125
  # Returns true if successful, raises on failure.
64
- def self.check_connection
126
+ def self.check_connection(db_shard: 0)
65
127
  conn = PG::Connection.new(
66
- host: ENV['OPENC3_TSDB_HOSTNAME'],
128
+ host: hostname_for_db_shard(db_shard),
67
129
  port: ENV['OPENC3_TSDB_QUERY_PORT'],
68
130
  user: ENV['OPENC3_TSDB_USERNAME'],
69
131
  password: ENV['OPENC3_TSDB_PASSWORD'],
@@ -291,14 +353,14 @@ module OpenC3
291
353
  # @param label [String, nil] Optional label for log messages
292
354
  # @return [PG::Result, nil] Query result
293
355
  # @raise [RuntimeError] After exhausting retries
294
- def self.query_with_retry(query, params: [], max_retries: 5, label: nil)
356
+ def self.query_with_retry(query, params: [], max_retries: 5, label: nil, db_shard: 0)
295
357
  retry_count = 0
296
358
  begin
297
- conn = connection
359
+ conn = connection(db_shard: db_shard)
298
360
  if params.empty?
299
- conn.exec(query)
361
+ return conn.exec(query)
300
362
  else
301
- conn.exec_params(query, params)
363
+ return conn.exec_params(query, params)
302
364
  end
303
365
  rescue IOError, PG::Error => e
304
366
  retry_count += 1
@@ -307,7 +369,7 @@ module OpenC3
307
369
  end
308
370
  Logger.warn("TSDB#{label ? " #{label}" : ""}: Retrying due to error: #{e.message}")
309
371
  Logger.warn("TSDB#{label ? " #{label}" : ""}: Last query: #{query}")
310
- disconnect
372
+ disconnect(db_shard: db_shard)
311
373
  sleep 0.1
312
374
  retry
313
375
  end
@@ -543,11 +605,11 @@ module OpenC3
543
605
  # @param start_time [Integer] Nanosecond start time
544
606
  # @param end_time [Integer, nil] Nanosecond end time
545
607
  # @return [Boolean]
546
- def self.table_has_data?(table_name, start_time, end_time)
547
- query = "SELECT 1 FROM #{table_name}"
608
+ def self.table_has_data?(table_name, start_time, end_time, db_shard: 0)
609
+ query = "SELECT 1 FROM \"#{table_name}\""
548
610
  query += time_where_clause(start_time, end_time)
549
611
  query += " LIMIT 1"
550
- result = query_with_retry(query, max_retries: 1, label: "table_has_data")
612
+ result = query_with_retry(query, max_retries: 1, label: "table_has_data", db_shard: db_shard)
551
613
  result && result.ntuples > 0
552
614
  rescue RuntimeError
553
615
  false
@@ -560,13 +622,13 @@ module OpenC3
560
622
  # @param page_size [Integer] Number of rows per page
561
623
  # @param label [String] Label for log messages
562
624
  # @yield [PG::Result] Each page of results
563
- def self.paginate_query(query, page_size, label:)
625
+ def self.paginate_query(query, page_size, label:, db_shard: 0)
564
626
  min = 0
565
627
  max = page_size
566
628
  loop do
567
629
  query_offset = "#{query} LIMIT #{min}, #{max}"
568
630
  Logger.debug("QuestDB #{label}: #{query_offset}")
569
- result = query_with_retry(query_offset, label: label)
631
+ result = query_with_retry(query_offset, label: label, db_shard: db_shard)
570
632
  min += page_size
571
633
  max += page_size
572
634
  if result.nil? or result.ntuples == 0
@@ -590,7 +652,7 @@ module OpenC3
590
652
  names << TIMESTAMP_SELECT
591
653
  names << "RECEIVED_TIMESECONDS" if include_received_ts
592
654
  names << "COSMOS_EXTRA"
593
- query = "SELECT #{names.join(', ')} FROM #{table_name}"
655
+ query = "SELECT #{names.join(', ')} FROM \"#{table_name}\""
594
656
  query += time_where_clause(start_time, end_time)
595
657
  query
596
658
  end
@@ -808,6 +870,8 @@ module OpenC3
808
870
 
809
871
  # Query historical telemetry data from QuestDB for a list of items.
810
872
  # Builds the SQL query, executes it, and decodes all results.
873
+ # Supports cross-db_shard queries by grouping items by db_shard, executing
874
+ # separate queries per db_shard, and merging results positionally.
811
875
  #
812
876
  # @param items [Array] Array of [target_name, packet_name, item_name, value_type, limits]
813
877
  # item_name may be nil to indicate a placeholder (non-existent item)
@@ -817,6 +881,78 @@ module OpenC3
817
881
  # @return [Array, Hash] Array of [value, limits_state] pairs per row, or {} if no results.
818
882
  # Single-row results return a flat array; multi-row results return array of arrays.
819
883
  def self.tsdb_lookup(items, start_time:, end_time: nil, scope: "DEFAULT")
884
+ # Group items by db_shard number while preserving their original positions
885
+ db_shard_groups = {} # db_shard => { positions: [], items: [] }
886
+ items.each_with_index do |item, pos|
887
+ target_name = item[0]
888
+ db_shard = db_shard_for_target(target_name, scope: scope)
889
+ db_shard_groups[db_shard] ||= { positions: [], items: [] }
890
+ db_shard_groups[db_shard][:positions] << pos
891
+ db_shard_groups[db_shard][:items] << item
892
+ end
893
+
894
+ # Single-db_shard fast path (most common case)
895
+ if db_shard_groups.length == 1
896
+ db_shard, group = db_shard_groups.first
897
+ return tsdb_lookup_single_db_shard(group[:items], start_time: start_time, end_time: end_time, scope: scope, db_shard: db_shard)
898
+ end
899
+
900
+ # Cross-db_shard: execute per-db_shard queries and merge results
901
+ db_shard_results = {} # db_shard => data
902
+ db_shard_groups.each do |db_shard, group|
903
+ result = tsdb_lookup_single_db_shard(group[:items], start_time: start_time, end_time: end_time, scope: scope, db_shard: db_shard)
904
+ db_shard_results[db_shard] = result
905
+ end
906
+
907
+ # If all db_shards returned empty, return empty
908
+ return {} if db_shard_results.values.all? { |r| r == {} }
909
+
910
+ # Merge results positionally back into the original item order.
911
+ # For single-row results (no end_time), merge flat arrays.
912
+ # For multi-row results, each db_shard may have different row counts;
913
+ # use the maximum row count and fill missing positions with [nil, nil].
914
+ if !end_time
915
+ # Single-row mode: each db_shard returns a flat array of [value, limits] pairs.
916
+ # Merge them into the original item order.
917
+ merged = Array.new(items.length) { [nil, nil] }
918
+ db_shard_groups.each do |db_shard, group|
919
+ result = db_shard_results[db_shard]
920
+ next if result == {} || !result.is_a?(Array)
921
+ group[:positions].each_with_index do |orig_pos, db_shard_idx|
922
+ merged[orig_pos] = result[db_shard_idx] if result[db_shard_idx]
923
+ end
924
+ end
925
+ merged
926
+ else
927
+ # Multi-row mode: find max row count across db_shards
928
+ max_rows = 0
929
+ db_shard_groups.each do |db_shard, _group|
930
+ result = db_shard_results[db_shard]
931
+ next if result == {}
932
+ count = result.is_a?(Array) ? result.length : 0
933
+ max_rows = count if count > max_rows
934
+ end
935
+ return {} if max_rows == 0
936
+
937
+ merged = Array.new(max_rows) { Array.new(items.length) { [nil, nil] } }
938
+ db_shard_groups.each do |db_shard, group|
939
+ result = db_shard_results[db_shard]
940
+ next if result == {}
941
+ rows = result.is_a?(Array) ? result : []
942
+ rows.each_with_index do |row, row_num|
943
+ next unless row.is_a?(Array)
944
+ group[:positions].each_with_index do |orig_pos, db_shard_idx|
945
+ merged[row_num][orig_pos] = row[db_shard_idx] if row[db_shard_idx]
946
+ end
947
+ end
948
+ end
949
+ merged
950
+ end
951
+ end
952
+
953
+ # Execute a tsdb_lookup query against a single db_shard.
954
+ # This contains the original ASOF JOIN logic for items all on the same QuestDB instance.
955
+ def self.tsdb_lookup_single_db_shard(items, start_time:, end_time: nil, scope: "DEFAULT", db_shard: 0)
820
956
  tables = {}
821
957
  names = []
822
958
  nil_count = 0
@@ -888,9 +1024,9 @@ module OpenC3
888
1024
  query = "SELECT #{names.join(", ")} FROM "
889
1025
  tables.each_with_index do |(table_name, _), index|
890
1026
  if index == 0
891
- query += "#{table_name} as T#{index} "
1027
+ query += "\"#{table_name}\" as T#{index} "
892
1028
  else
893
- query += "ASOF JOIN #{table_name} as T#{index} "
1029
+ query += "ASOF JOIN \"#{table_name}\" as T#{index} "
894
1030
  end
895
1031
  end
896
1032
  query_params = []
@@ -903,7 +1039,7 @@ module OpenC3
903
1039
  query_params << end_time
904
1040
  end
905
1041
 
906
- result = query_with_retry(query, params: query_params, label: "tsdb_lookup")
1042
+ result = query_with_retry(query, params: query_params, label: "tsdb_lookup", db_shard: db_shard)
907
1043
  if result.nil? or result.ntuples == 0
908
1044
  return {}
909
1045
  end
@@ -0,0 +1,377 @@
1
+ # encoding: ascii-8bit
2
+
3
+ # Copyright 2026 OpenC3, Inc.
4
+ # All Rights Reserved.
5
+ #
6
+ # This program is distributed in the hope that it will be useful,
7
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
8
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
9
+ # See LICENSE.md for more details.
10
+ #
11
+ # This file may also be used under the terms of a commercial license
12
+ # if purchased from OpenC3, Inc.
13
+
14
+ require 'fileutils'
15
+ require 'tmpdir'
16
+ require 'openc3/system/system'
17
+ require 'openc3/utilities/bucket'
18
+ require 'openc3/utilities/bucket_utilities'
19
+ require 'openc3/utilities/logger'
20
+ require 'openc3/utilities/questdb_client'
21
+ require 'openc3/logs/packet_log_reader'
22
+ require 'openc3/microservices/decom_common'
23
+ require 'openc3/models/reingest_job_model'
24
+
25
+ module OpenC3
26
+ class ReingestJobError < StandardError; end
27
+
28
+ # Replays raw .bin.gz log files from a bucket, decommutating each packet via
29
+ # DecomCommon.decom_and_publish(check_limits: false) so historical data
30
+ # reaches QuestDB without re-firing limits events.
31
+ #
32
+ # Runs synchronously (caller wraps in a Thread). Tracks state in a
33
+ # ReingestJobModel. DEDUP is enabled on affected tables during the job and
34
+ # disabled in an ensure block on completion (or after a cooldown window so
35
+ # in-flight WAL commits are covered).
36
+ #
37
+ # target_version:
38
+ # - 'as_logged' (default): each file is decoded with the target config hash
39
+ # that was in effect when the packets were originally logged. Files are
40
+ # grouped by their embedded target_id and System is rebuilt per group.
41
+ # - 'current': all files are decoded with the latest target config.
42
+ # - <hash>: explicit hash, used for every file in the job.
43
+ class ReingestJob
44
+ # How often to persist progress during the ingest pass (write every N packets)
45
+ STATUS_UPDATE_EVERY = 500
46
+ # How often to tick the heartbeat during the cooldown sleep
47
+ HEARTBEAT_INTERVAL_SEC = 10
48
+
49
+ # Reingest rebuilds the process-global System singleton. Serialize all
50
+ # reingest jobs running in this process so they don't stomp each other.
51
+ @@run_mutex = Mutex.new
52
+
53
+ def initialize(job_id:, files:, path:, bucket:, scope:,
54
+ target_version: 'as_logged',
55
+ dedup_cooldown_seconds: ENV.fetch('OPENC3_REINGEST_DEDUP_COOLDOWN', 60).to_i,
56
+ logger: Logger)
57
+ @job_id = job_id
58
+ @files = files
59
+ @path = path
60
+ @bucket_env = bucket
61
+ @scope = scope
62
+ @target_version = target_version
63
+ @dedup_cooldown_seconds = dedup_cooldown_seconds
64
+ @logger = logger
65
+ end
66
+
67
+ def run
68
+ tmp_dir = Dir.mktmpdir
69
+ job = load_job
70
+ dedup_enabled_by_us = []
71
+ db_shard = 0
72
+ @@run_mutex.synchronize do
73
+ begin
74
+ mark(job, state: 'Running', progress_phase: 'downloading',
75
+ started_at: Time.now.utc.iso8601,
76
+ progress_total: @files.length)
77
+
78
+ # Parse target from path, e.g. "DEFAULT/raw_logs/tlm/INST/20260421/"
79
+ # → "INST". Fail fast if the path doesn't encode one — otherwise
80
+ # ingest would run against whatever System was loaded in this process
81
+ # from a prior job (or raise opaquely inside PacketLogReader), and
82
+ # the job could mark Complete with rows written under the wrong
83
+ # target config.
84
+ path_parts = @path.to_s.split('/').reject(&:empty?)
85
+ unless path_parts.length >= 4 && path_parts[1] == 'raw_logs'
86
+ raise ReingestJobError, "Cannot determine target from path '#{@path}'; expected '{scope}/raw_logs/{tlm|cmd}/{target}/'"
87
+ end
88
+ target = path_parts[3]
89
+ db_shard = QuestDBClient.db_shard_for_target(target, scope: @scope)
90
+
91
+ local_files = download_and_uncompress(job, tmp_dir)
92
+
93
+ # Pass 1: read raw (no System required) to discover table names and
94
+ # each file's embedded target hash. File hashes are what the "as
95
+ # logged" mode uses to pick the right target_version per file.
96
+ mark(job, progress_phase: 'enabling_dedup', progress_current: 0,
97
+ progress_total: 0)
98
+ table_names, file_versions = discover_tables_and_versions(local_files)
99
+ mark(job, table_names: table_names, progress_total: table_names.length)
100
+
101
+ dedup_enabled_by_us, preexisting = enable_dedup(job, table_names, db_shard)
102
+ mark(job,
103
+ dedup_enabled_by_us: dedup_enabled_by_us,
104
+ dedup_preexisting: preexisting,
105
+ dedup_enabled_at: Time.now.utc.iso8601)
106
+
107
+ # Pass 2: group files by the target_version we'll load for them,
108
+ # then ingest each group under its own System instance.
109
+ groups = group_files_by_version(local_files, file_versions)
110
+ mark(job, versions_used: groups.keys,
111
+ progress_phase: 'ingesting', progress_current: 0,
112
+ progress_total: 0, packets_written: 0)
113
+ ingest_all_groups(job, groups, target)
114
+
115
+ mark(job, progress_phase: 'dedup_cooldown')
116
+ cooldown(job)
117
+
118
+ mark(job, progress_phase: 'disabling_dedup')
119
+ disabled = disable_dedup(job, dedup_enabled_by_us, db_shard)
120
+ mark(job, dedup_disabled_tables: disabled,
121
+ dedup_disabled_at: Time.now.utc.iso8601,
122
+ state: 'Complete',
123
+ finished_at: Time.now.utc.iso8601)
124
+ rescue Exception => e
125
+ @logger.error("Reingest job #{@job_id} failed: #{e.message}\n#{e.backtrace.first(10).join("\n")}")
126
+ # Always try to revert DEDUP even on crash so user tables are not left altered
127
+ disabled_on_crash = []
128
+ begin
129
+ disabled_on_crash = disable_dedup(job, dedup_enabled_by_us, db_shard)
130
+ rescue => de
131
+ @logger.error("Reingest job #{@job_id} failed to disable DEDUP during crash cleanup: #{de.message}")
132
+ end
133
+ mark(job,
134
+ dedup_disabled_tables: disabled_on_crash,
135
+ dedup_disabled_at: Time.now.utc.iso8601,
136
+ state: 'Crashed',
137
+ error: e.message,
138
+ finished_at: Time.now.utc.iso8601)
139
+ ensure
140
+ FileUtils.remove_entry_secure(tmp_dir, true) if tmp_dir && File.directory?(tmp_dir)
141
+ end
142
+ end
143
+ end
144
+
145
+ private
146
+
147
+ def load_job
148
+ ReingestJobModel.get_model(name: @job_id, scope: @scope) or
149
+ raise ReingestJobError, "ReingestJobModel #{@job_id} not found in scope #{@scope}"
150
+ end
151
+
152
+ # Merge attrs into the model and persist. Model#update refreshes updated_at,
153
+ # which doubles as the heartbeat used by the stale-check.
154
+ def mark(job, **attrs)
155
+ attrs.each { |k, v| job.send("#{k}=", v) }
156
+ job.update
157
+ end
158
+
159
+ def download_and_uncompress(job, tmp_dir)
160
+ bucket_name = ENV.fetch(@bucket_env) { |name| raise ReingestJobError, "Unknown bucket #{name}" }
161
+ bucket_client = Bucket.getClient()
162
+ local_files = []
163
+ tmp_root = File.expand_path(tmp_dir) + File::SEPARATOR
164
+ @files.each_with_index do |filename, i|
165
+ key = "#{@path}#{filename}"
166
+ temp_file = File.expand_path(File.join(tmp_dir, filename))
167
+ # Defense-in-depth: the controller validates filenames, but refuse to
168
+ # write outside tmp_dir if any caller (tests, future callers) bypasses it.
169
+ unless temp_file.start_with?(tmp_root)
170
+ raise ReingestJobError, "Invalid filename escapes tmp dir: #{filename}"
171
+ end
172
+ FileUtils.mkdir_p(File.dirname(temp_file))
173
+ bucket_client.get_object(bucket: bucket_name, key: key, path: temp_file)
174
+ if File.extname(filename) == '.gz'
175
+ decompressed = BucketUtilities.uncompress_file(temp_file)
176
+ File.delete(temp_file)
177
+ local_files << decompressed
178
+ else
179
+ local_files << temp_file
180
+ end
181
+ mark(job, progress_current: i + 1)
182
+ end
183
+ local_files
184
+ end
185
+
186
+ # Read each file in raw mode (identify_and_define=false) to collect
187
+ # {scope}__TLM__{target}__{packet} table names and the first target
188
+ # declaration hash embedded in each file. Returns [table_names, file_versions]
189
+ # where file_versions maps local_file_path → hex hash (or nil if the file
190
+ # has no hash, which happens for pre-6.x log files).
191
+ def discover_tables_and_versions(local_files)
192
+ table_names = Set.new
193
+ file_versions = {}
194
+ local_files.each do |local_file|
195
+ reader = PacketLogReader.new
196
+ reader.each(local_file, false) do |packet|
197
+ next unless packet.target_name && packet.packet_name
198
+ cmd_or_tlm = packet.cmd_or_tlm == :CMD ? 'CMD' : 'TLM'
199
+ table_names.add("#{@scope}__#{cmd_or_tlm}__#{packet.target_name}__#{packet.packet_name}")
200
+ end
201
+ ids = reader.instance_variable_get(:@target_ids) || []
202
+ file_versions[local_file] = ids.first ? ids.first.unpack1('H*') : nil
203
+ end
204
+ [table_names.to_a, file_versions]
205
+ end
206
+
207
+ # Returns a Hash of target_version → [local_file, ...]. The value at key
208
+ # 'current' means "use System with the latest config"; any other value is
209
+ # a specific hash string used as target_version in System.setup_targets.
210
+ def group_files_by_version(local_files, file_versions)
211
+ groups = Hash.new { |h, k| h[k] = [] }
212
+ case @target_version
213
+ when 'current'
214
+ groups['current'] = local_files.dup
215
+ when 'as_logged', nil
216
+ local_files.each do |file|
217
+ version = file_versions[file] || 'current'
218
+ groups[version] << file
219
+ end
220
+ else
221
+ # Caller passed an explicit hash; use it for every file.
222
+ groups[@target_version] = local_files.dup
223
+ end
224
+ groups
225
+ end
226
+
227
+ # For each version group, rebuild System under that version and ingest
228
+ # the group's files. `@@run_mutex` in `run` protects other threads from
229
+ # seeing a transient nil @@instance.
230
+ #
231
+ # If the requested target archive (a specific hash) is missing from the
232
+ # config bucket — which happens in dev setups where every `openc3.sh start`
233
+ # regenerates the target archive with a fresh timestamp-appended gem
234
+ # version — we fall back to 'current' and record a warning on the job so
235
+ # the UI can surface it. This matters because the old historical archive
236
+ # the log file references may no longer exist.
237
+ def ingest_all_groups(job, groups, target)
238
+ packets_written = 0
239
+ last_status_at = 0
240
+ warnings = (job.warnings || []).dup
241
+ groups.each do |version, files|
242
+ resolved = load_system_with_fallback(target, version, warnings)
243
+ unless resolved
244
+ # Even the 'current' fallback failed; skip this group rather than
245
+ # publish empty json_data for every packet.
246
+ mark(job, warnings: warnings)
247
+ next
248
+ end
249
+ mark(job, warnings: warnings) if warnings.any?
250
+ files.each do |file|
251
+ packets_written, last_status_at = ingest_file(job, file, packets_written, last_status_at)
252
+ end
253
+ end
254
+ mark(job, packets_written: packets_written, warnings: warnings)
255
+ end
256
+
257
+ # Returns the target_version that was actually loaded, or nil if even the
258
+ # 'current' fallback failed. Appends human-readable entries to `warnings`
259
+ # for any fallback or failure.
260
+ def load_system_with_fallback(target, version, warnings)
261
+ begin
262
+ load_system(target, version)
263
+ return version
264
+ rescue => e
265
+ if version == 'current'
266
+ # Caller explicitly requested 'current' and that failed; no further
267
+ # fallback exists — propagate so the outer rescue marks Crashed.
268
+ raise
269
+ end
270
+ @logger.warn("Reingest job #{@job_id}: target archive for #{target} version '#{version}' unavailable (#{e.class}: #{e.message}); falling back to 'current'")
271
+ warnings << "Version '#{version}' archive missing; used 'current' instead"
272
+ end
273
+
274
+ begin
275
+ load_system(target, 'current')
276
+ 'current'
277
+ rescue => e
278
+ @logger.error("Reingest job #{@job_id}: fallback to 'current' also failed: #{e.class}: #{e.message}")
279
+ warnings << "Version '#{version}' archive missing and 'current' also failed (#{e.message})"
280
+ nil
281
+ end
282
+ end
283
+
284
+ def load_system(target, version)
285
+ System.reset_instance!
286
+ System.setup_targets([target], Dir.tmpdir, scope: @scope, target_version: version)
287
+ end
288
+
289
+ def ingest_file(job, local_file, packets_written, last_status_at)
290
+ reader = PacketLogReader.new
291
+ reader.each(local_file, true) do |packet|
292
+ next unless packet.target_name && packet.packet_name
293
+ packet.stored = true
294
+ DecomCommon.decom_and_publish(
295
+ packet,
296
+ scope: @scope,
297
+ target_names: [packet.target_name],
298
+ logger: @logger,
299
+ name: "REINGEST:#{@job_id}",
300
+ check_limits: false,
301
+ )
302
+ packets_written += 1
303
+ if packets_written - last_status_at >= STATUS_UPDATE_EVERY
304
+ mark(job, packets_written: packets_written)
305
+ last_status_at = packets_written
306
+ end
307
+ end
308
+ [packets_written, last_status_at]
309
+ end
310
+
311
+ # Returns [enabled_by_us, preexisting]. Only tables we enable are recorded
312
+ # in enabled_by_us; pre-existing DEDUP tables are left untouched on teardown.
313
+ def enable_dedup(job, table_names, db_shard)
314
+ enabled_by_us = []
315
+ preexisting = []
316
+ conn = QuestDBClient.connection(db_shard: db_shard)
317
+ table_names.each_with_index do |table_name, i|
318
+ begin
319
+ already = dedup_already_enabled?(conn, table_name)
320
+ if already
321
+ preexisting << table_name
322
+ else
323
+ conn.exec("ALTER TABLE '#{table_name}' DEDUP ENABLE UPSERT KEYS(PACKET_TIMESECONDS)")
324
+ enabled_by_us << table_name
325
+ end
326
+ rescue => e
327
+ @logger.warn("Failed to enable DEDUP on #{table_name}: #{e.message}")
328
+ end
329
+ mark(job, progress_current: i + 1)
330
+ end
331
+ [enabled_by_us, preexisting]
332
+ end
333
+
334
+ # QuestDB exposes per-table dedup status via tables() function.
335
+ # Falls back to false (treat as not-enabled, will issue ALTER) on any error.
336
+ def dedup_already_enabled?(conn, table_name)
337
+ result = conn.exec_params(
338
+ "SELECT dedup FROM tables() WHERE table_name = $1",
339
+ [table_name],
340
+ )
341
+ return false if result.ntuples == 0
342
+ value = result[0]['dedup']
343
+ value == true || value == 't' || value.to_s.downcase == 'true'
344
+ rescue => e
345
+ @logger.warn("Could not query DEDUP status for #{table_name}: #{e.message}")
346
+ false
347
+ end
348
+
349
+ # Sleep dedup_cooldown_seconds, ticking the heartbeat so the stale-check
350
+ # doesn't misfire during the wait. This gives the Python TsdbMicroservice
351
+ # and QuestDB WAL time to commit reingested rows while DEDUP is still on.
352
+ def cooldown(job)
353
+ remaining = @dedup_cooldown_seconds
354
+ while remaining > 0
355
+ step = [HEARTBEAT_INTERVAL_SEC, remaining].min
356
+ sleep(step)
357
+ remaining -= step
358
+ mark(job) # heartbeat only
359
+ end
360
+ end
361
+
362
+ def disable_dedup(job, tables, db_shard)
363
+ disabled = []
364
+ conn = QuestDBClient.connection(db_shard: db_shard)
365
+ tables.each_with_index do |table_name, i|
366
+ begin
367
+ conn.exec("ALTER TABLE '#{table_name}' DEDUP DISABLE")
368
+ disabled << table_name
369
+ rescue => e
370
+ @logger.warn("Failed to disable DEDUP on #{table_name}: #{e.message}")
371
+ end
372
+ mark(job, progress_current: i + 1, progress_total: tables.length)
373
+ end
374
+ disabled
375
+ end
376
+ end
377
+ end
@@ -16,7 +16,9 @@
16
16
  # if purchased from OpenC3, Inc.
17
17
 
18
18
  require 'irb/ruby-lex'
19
+ require 'ripper'
19
20
  require 'prism'
21
+ require 'ripper'
20
22
 
21
23
  class RubyLexUtils
22
24
  OPENING_DELIMITER_TYPES = %i(PARENTHESIS_LEFT BRACKET_LEFT BRACE_LEFT BRACKET_LEFT_ARRAY)