solid_log-core 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. checksums.yaml +7 -0
  2. data/MIT-LICENSE +20 -0
  3. data/README.md +176 -0
  4. data/Rakefile +11 -0
  5. data/db/log_migrate/20251222000001_create_solid_log_raw.rb +15 -0
  6. data/db/log_migrate/20251222000002_create_solid_log_entries.rb +29 -0
  7. data/db/log_migrate/20251222000004_create_solid_log_fields.rb +17 -0
  8. data/db/log_migrate/20251222000005_create_solid_log_tokens.rb +13 -0
  9. data/db/log_migrate/20251222000006_create_solid_log_facet_cache.rb +13 -0
  10. data/db/log_migrate/20251222000007_create_solid_log_fts_triggers.rb +41 -0
  11. data/db/log_structure_mysql.sql +96 -0
  12. data/db/log_structure_postgresql.sql +118 -0
  13. data/db/log_structure_sqlite.sql +123 -0
  14. data/lib/generators/solid_log/install/install_generator.rb +134 -0
  15. data/lib/generators/solid_log/install/templates/solid_log.rb.tt +133 -0
  16. data/lib/solid_log/adapters/adapter_factory.rb +34 -0
  17. data/lib/solid_log/adapters/base_adapter.rb +88 -0
  18. data/lib/solid_log/adapters/mysql_adapter.rb +163 -0
  19. data/lib/solid_log/adapters/postgresql_adapter.rb +141 -0
  20. data/lib/solid_log/adapters/sqlite_adapter.rb +149 -0
  21. data/lib/solid_log/core/client/buffer.rb +112 -0
  22. data/lib/solid_log/core/client/configuration.rb +31 -0
  23. data/lib/solid_log/core/client/http.rb +89 -0
  24. data/lib/solid_log/core/client/lograge_formatter.rb +99 -0
  25. data/lib/solid_log/core/client/retry_handler.rb +48 -0
  26. data/lib/solid_log/core/client.rb +138 -0
  27. data/lib/solid_log/core/configuration.rb +60 -0
  28. data/lib/solid_log/core/services/correlation_service.rb +74 -0
  29. data/lib/solid_log/core/services/field_analyzer.rb +108 -0
  30. data/lib/solid_log/core/services/health_service.rb +151 -0
  31. data/lib/solid_log/core/services/retention_service.rb +72 -0
  32. data/lib/solid_log/core/services/search_service.rb +269 -0
  33. data/lib/solid_log/core/version.rb +5 -0
  34. data/lib/solid_log/core.rb +106 -0
  35. data/lib/solid_log/direct_logger.rb +197 -0
  36. data/lib/solid_log/models/entry.rb +185 -0
  37. data/lib/solid_log/models/facet_cache.rb +58 -0
  38. data/lib/solid_log/models/field.rb +100 -0
  39. data/lib/solid_log/models/raw_entry.rb +33 -0
  40. data/lib/solid_log/models/record.rb +5 -0
  41. data/lib/solid_log/models/token.rb +61 -0
  42. data/lib/solid_log/parser.rb +179 -0
  43. data/lib/solid_log/silence_middleware.rb +34 -0
  44. data/lib/solid_log-core.rb +2 -0
  45. metadata +244 -0
@@ -0,0 +1,60 @@
1
+ module SolidLog
2
+ module Core
3
+ class Configuration
4
+ attr_accessor :database_url,
5
+ :retention_days,
6
+ :error_retention_days,
7
+ :max_batch_size,
8
+ :parser_batch_size,
9
+ :parser_concurrency,
10
+ :auto_promote_fields,
11
+ :field_promotion_threshold,
12
+ :facet_cache_ttl,
13
+ :live_tail_mode
14
+
15
+ def initialize
16
+ # Load from ENV vars with defaults
17
+ @database_url = ENV["SOLIDLOG_DATABASE_URL"] || ENV["DATABASE_URL"]
18
+ @retention_days = env_to_int("SOLIDLOG_RETENTION_DAYS", 30)
19
+ @error_retention_days = env_to_int("SOLIDLOG_ERROR_RETENTION_DAYS", 90)
20
+ @max_batch_size = env_to_int("SOLIDLOG_MAX_BATCH_SIZE", 1000) # For API ingestion
21
+ @parser_batch_size = env_to_int("SOLIDLOG_PARSER_BATCH_SIZE", 200) # Number of raw entries to parse per job
22
+ @parser_concurrency = env_to_int("SOLIDLOG_PARSER_CONCURRENCY", 5)
23
+ @auto_promote_fields = env_to_bool("SOLIDLOG_AUTO_PROMOTE_FIELDS", false)
24
+ @field_promotion_threshold = env_to_int("SOLIDLOG_FIELD_PROMOTION_THRESHOLD", 1000)
25
+ @facet_cache_ttl = env_to_int("SOLIDLOG_FACET_CACHE_TTL", 300) # seconds (5 minutes)
26
+ @live_tail_mode = env_to_symbol("SOLIDLOG_LIVE_TAIL_MODE", :disabled) # :websocket, :polling, or :disabled
27
+ end
28
+
29
+ # Check if database is configured
30
+ def database_configured?
31
+ database_url.present?
32
+ end
33
+
34
+ # Get cache TTL in seconds
35
+ def cache_ttl_seconds
36
+ facet_cache_ttl.to_i
37
+ end
38
+
39
+ private
40
+
41
+ def env_to_int(key, default = nil)
42
+ value = ENV[key]
43
+ return default if value.nil? || value.empty?
44
+ value.to_i
45
+ end
46
+
47
+ def env_to_bool(key, default = false)
48
+ value = ENV[key]
49
+ return default if value.nil? || value.empty?
50
+ ["true", "1", "yes", "on"].include?(value.downcase)
51
+ end
52
+
53
+ def env_to_symbol(key, default = nil)
54
+ value = ENV[key]
55
+ return default if value.nil? || value.empty?
56
+ value.to_sym
57
+ end
58
+ end
59
+ end
60
+ end
@@ -0,0 +1,74 @@
1
+ module SolidLog
2
+ module Core
3
+ class CorrelationService
4
+ # Get all entries for a specific request ID in timeline order
5
+ def self.request_timeline(request_id)
6
+ return Entry.none if request_id.blank?
7
+
8
+ Entry.by_request_id(request_id).recent
9
+ end
10
+
11
+ # Get all entries for a specific job ID in timeline order
12
+ def self.job_timeline(job_id)
13
+ return Entry.none if job_id.blank?
14
+
15
+ Entry.by_job_id(job_id).recent
16
+ end
17
+
18
+ # Get correlation stats for a request
19
+ def self.request_stats(request_id)
20
+ entries = request_timeline(request_id)
21
+
22
+ {
23
+ total_entries: entries.count,
24
+ duration: calculate_duration(entries),
25
+ levels: entries.group(:level).count,
26
+ first_timestamp: entries.first&.timestamp,
27
+ last_timestamp: entries.last&.timestamp
28
+ }
29
+ end
30
+
31
+ # Get correlation stats for a job
32
+ def self.job_stats(job_id)
33
+ entries = job_timeline(job_id)
34
+
35
+ {
36
+ total_entries: entries.count,
37
+ duration: calculate_duration(entries),
38
+ levels: entries.group(:level).count,
39
+ first_timestamp: entries.first&.timestamp,
40
+ last_timestamp: entries.last&.timestamp
41
+ }
42
+ end
43
+
44
+ # Find related entries (same request_id or job_id)
45
+ def self.find_related(entry)
46
+ related = []
47
+
48
+ if entry.request_id.present?
49
+ related += request_timeline(entry.request_id).where.not(id: entry.id).to_a
50
+ end
51
+
52
+ if entry.job_id.present?
53
+ related += job_timeline(entry.job_id).where.not(id: entry.id).to_a
54
+ end
55
+
56
+ related.uniq.sort_by(&:timestamp)
57
+ end
58
+
59
+ private
60
+
61
+ # Calculate total duration from first to last entry
62
+ def self.calculate_duration(entries)
63
+ return nil if entries.count < 2
64
+
65
+ first = entries.first
66
+ last = entries.last
67
+
68
+ return nil unless first&.timestamp && last&.timestamp
69
+
70
+ ((last.timestamp - first.timestamp) * 1000).round # milliseconds
71
+ end
72
+ end
73
+ end
74
+ end
@@ -0,0 +1,108 @@
1
+ module SolidLog
2
+ module Core
3
+ class FieldAnalyzer
4
+ # Analyze fields and return promotion recommendations
5
+ def self.analyze(threshold: 1000)
6
+ recommendations = []
7
+
8
+ # Get hot fields that are not yet promoted
9
+ hot_fields = Field
10
+ .hot_fields(threshold)
11
+ .unpromoted
12
+ .recently_seen(30) # Only consider fields seen in last 30 days
13
+
14
+ hot_fields.each do |field|
15
+ # Calculate priority score
16
+ priority = calculate_priority(field, threshold)
17
+
18
+ recommendations << {
19
+ field: field,
20
+ priority: priority,
21
+ reason: promotion_reason(field, threshold)
22
+ }
23
+ end
24
+
25
+ # Sort by priority (highest first)
26
+ recommendations.sort_by { |rec| -rec[:priority] }
27
+ end
28
+
29
+ # Auto-promote fields that meet the threshold
30
+ def self.auto_promote_candidates(threshold: 1000)
31
+ candidates = analyze(threshold: threshold)
32
+ promoted_count = 0
33
+
34
+ candidates.each do |candidate|
35
+ field = candidate[:field]
36
+
37
+ # Only auto-promote fields with high priority
38
+ if candidate[:priority] >= 80
39
+ Rails.logger.info "FieldAnalyzer: Auto-promoting field '#{field.name}' (usage: #{field.usage_count}, priority: #{candidate[:priority]})"
40
+
41
+ field.promote!
42
+ promoted_count += 1
43
+
44
+ # TODO: In real implementation, we would need to:
45
+ # 1. Generate migration to add column
46
+ # 2. Backfill existing data
47
+ # 3. Update queries to use promoted field
48
+ # For now, just mark as promoted
49
+ end
50
+ end
51
+
52
+ promoted_count
53
+ end
54
+
55
+ private
56
+
57
+ # Calculate promotion priority (0-100 scale)
58
+ def self.calculate_priority(field, threshold)
59
+ score = 0
60
+
61
+ # Usage score (0-50 points)
62
+ usage_score = [50, (field.usage_count.to_f / (threshold * 10) * 50).to_i].min
63
+ score += usage_score
64
+
65
+ # Recency score (0-25 points)
66
+ days_since_seen = (Time.current - field.last_seen_at) / 1.day
67
+ recency_score = [0, [25, (25 - days_since_seen).to_i].min].max
68
+ score += recency_score
69
+
70
+ # Type score (0-25 points)
71
+ # Favor simple types that are easier to index
72
+ type_score = case field.field_type
73
+ when "string", "number", "boolean"
74
+ 25
75
+ when "datetime"
76
+ 20
77
+ else
78
+ 10
79
+ end
80
+ score += type_score
81
+
82
+ [100, score].min
83
+ end
84
+
85
+ # Generate human-readable reason for promotion
86
+ def self.promotion_reason(field, threshold)
87
+ reasons = []
88
+
89
+ if field.usage_count >= threshold * 10
90
+ reasons << "extremely high usage (#{field.usage_count})"
91
+ elsif field.usage_count >= threshold * 5
92
+ reasons << "very high usage (#{field.usage_count})"
93
+ else
94
+ reasons << "high usage (#{field.usage_count})"
95
+ end
96
+
97
+ days_since_seen = (Time.current - field.last_seen_at) / 1.day
98
+ if days_since_seen < 1
99
+ reasons << "actively used today"
100
+ elsif days_since_seen < 7
101
+ reasons << "recently active"
102
+ end
103
+
104
+ reasons.join(", ")
105
+ end
106
+ end
107
+ end
108
+ end
@@ -0,0 +1,151 @@
1
+ module SolidLog
2
+ module Core
3
+ class HealthService
4
+ # Get comprehensive health metrics
5
+ def self.metrics
6
+ {
7
+ ingestion: ingestion_metrics,
8
+ parsing: parsing_metrics,
9
+ storage: storage_metrics,
10
+ performance: performance_metrics
11
+ }
12
+ end
13
+
14
+ # Ingestion metrics
15
+ def self.ingestion_metrics
16
+ today_start = Time.current.beginning_of_day
17
+ hour_ago = 1.hour.ago
18
+
19
+ {
20
+ total_raw: RawEntry.count,
21
+ today_raw: RawEntry.where("received_at >= ?", today_start).count,
22
+ last_hour_raw: RawEntry.where("received_at >= ?", hour_ago).count,
23
+ last_ingestion: RawEntry.order(received_at: :desc).first&.received_at
24
+ }
25
+ end
26
+
27
+ # Parsing metrics
28
+ def self.parsing_metrics
29
+ unparsed_count = RawEntry.unparsed.count
30
+ total_raw = RawEntry.count
31
+ stale_threshold = 1.hour.ago
32
+
33
+ backlog_percentage = total_raw > 0 ? (unparsed_count.to_f / total_raw * 100).round(2) : 0
34
+ stale_unparsed = RawEntry.unparsed.where("received_at < ?", stale_threshold).count
35
+
36
+ health_status = case
37
+ when backlog_percentage > 50
38
+ "critical"
39
+ when backlog_percentage > 20
40
+ "warning"
41
+ when stale_unparsed > 100
42
+ "degraded"
43
+ else
44
+ "healthy"
45
+ end
46
+
47
+ {
48
+ unparsed_count: unparsed_count,
49
+ parse_backlog_percentage: backlog_percentage,
50
+ stale_unparsed: stale_unparsed,
51
+ health_status: health_status
52
+ }
53
+ end
54
+
55
+ # Storage metrics
56
+ def self.storage_metrics
57
+ promoted_fields = Field.promoted.count
58
+ hot_fields = Field.hot_fields(1000).count
59
+
60
+ {
61
+ total_entries: Entry.count,
62
+ total_fields: Field.count,
63
+ promoted_fields: promoted_fields,
64
+ hot_fields_count: hot_fields,
65
+ database_size: database_size
66
+ }
67
+ end
68
+
69
+ # Performance metrics
70
+ def self.performance_metrics
71
+ hour_ago = 1.hour.ago
72
+ hour_entries = Entry.where("timestamp >= ?", hour_ago)
73
+
74
+ error_count = hour_entries.errors.count
75
+ total_count = hour_entries.count
76
+ error_rate = total_count > 0 ? (error_count.to_f / total_count * 100).round(2) : 0
77
+
78
+ avg_duration = hour_entries
79
+ .where.not(duration: nil)
80
+ .average(:duration)
81
+ &.round(2) || 0
82
+
83
+ {
84
+ cache_entries: FacetCache.count,
85
+ expired_cache: FacetCache.where("expires_at < ?", Time.current).count,
86
+ error_rate: error_rate,
87
+ avg_duration: avg_duration
88
+ }
89
+ end
90
+
91
+ # Get database size (platform-specific)
92
+ def self.database_size
93
+ adapter_name = ActiveRecord::Base.connection.adapter_name.downcase
94
+
95
+ case adapter_name
96
+ when "sqlite"
97
+ sqlite_database_size
98
+ when "postgresql"
99
+ postgresql_database_size
100
+ when "mysql"
101
+ mysql_database_size
102
+ else
103
+ "Unknown"
104
+ end
105
+ rescue => e
106
+ Rails.logger.error "HealthService: Error getting database size: #{e.message}"
107
+ "Error"
108
+ end
109
+
110
+ private
111
+
112
+ def self.sqlite_database_size
113
+ db_path = ActiveRecord::Base.connection_db_config.database
114
+ return "Unknown" unless File.exist?(db_path)
115
+
116
+ size_bytes = File.size(db_path)
117
+ format_bytes(size_bytes)
118
+ end
119
+
120
+ def self.postgresql_database_size
121
+ db_name = ActiveRecord::Base.connection_db_config.database
122
+ result = ActiveRecord::Base.connection.execute(
123
+ "SELECT pg_database_size('#{db_name}')"
124
+ )
125
+ size_bytes = result.first["pg_database_size"].to_i
126
+ format_bytes(size_bytes)
127
+ end
128
+
129
+ def self.mysql_database_size
130
+ db_name = ActiveRecord::Base.connection_db_config.database
131
+ result = ActiveRecord::Base.connection.execute(
132
+ "SELECT SUM(data_length + index_length) as size
133
+ FROM information_schema.TABLES
134
+ WHERE table_schema = '#{db_name}'"
135
+ )
136
+ size_bytes = result.first[0].to_i
137
+ format_bytes(size_bytes)
138
+ end
139
+
140
+ def self.format_bytes(bytes)
141
+ return "0 B" if bytes == 0
142
+
143
+ units = %w[B KB MB GB TB]
144
+ exp = (Math.log(bytes) / Math.log(1024)).to_i
145
+ exp = [exp, units.size - 1].min
146
+
147
+ "%.2f %s" % [bytes.to_f / (1024 ** exp), units[exp]]
148
+ end
149
+ end
150
+ end
151
+ end
@@ -0,0 +1,72 @@
1
+ module SolidLog
2
+ module Core
3
+ class RetentionService
4
+ # Cleanup old entries based on retention policies
5
+ def self.cleanup(retention_days:, error_retention_days:)
6
+ stats = {
7
+ entries_deleted: 0,
8
+ raw_deleted: 0,
9
+ cache_cleared: 0
10
+ }
11
+
12
+ # Calculate retention thresholds
13
+ regular_threshold = retention_days.days.ago
14
+ error_threshold = error_retention_days.days.ago
15
+
16
+ # Delete old regular logs (not errors)
17
+ stats[:entries_deleted] = Entry
18
+ .where("timestamp < ?", regular_threshold)
19
+ .where.not(level: %w[error fatal])
20
+ .delete_all
21
+
22
+ # Delete old error logs
23
+ stats[:entries_deleted] += Entry
24
+ .where("timestamp < ?", error_threshold)
25
+ .where(level: %w[error fatal])
26
+ .delete_all
27
+
28
+ # Delete corresponding raw entries (keep unparsed for investigation)
29
+ raw_ids = Entry.pluck(:raw_id).compact
30
+ stats[:raw_deleted] = RawEntry
31
+ .parsed
32
+ .where.not(id: raw_ids)
33
+ .delete_all
34
+
35
+ # Clear old cache entries
36
+ stats[:cache_cleared] = FacetCache
37
+ .where("expires_at < ?", Time.current)
38
+ .delete_all
39
+
40
+ stats
41
+ end
42
+
43
+ # Vacuum database (SQLite only)
44
+ def self.vacuum_database
45
+ return false unless sqlite_database?
46
+
47
+ ActiveRecord::Base.connection.execute("VACUUM")
48
+ true
49
+ rescue => e
50
+ Rails.logger.error "RetentionService: VACUUM failed: #{e.message}"
51
+ false
52
+ end
53
+
54
+ # Optimize database (SQLite PRAGMA optimize)
55
+ def self.optimize_database
56
+ return false unless sqlite_database?
57
+
58
+ ActiveRecord::Base.connection.execute("PRAGMA optimize")
59
+ true
60
+ rescue => e
61
+ Rails.logger.error "RetentionService: PRAGMA optimize failed: #{e.message}"
62
+ false
63
+ end
64
+
65
+ private
66
+
67
+ def self.sqlite_database?
68
+ ActiveRecord::Base.connection.adapter_name.downcase == "sqlite"
69
+ end
70
+ end
71
+ end
72
+ end