trainspotter 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (38) hide show
  1. checksums.yaml +7 -0
  2. data/MIT-LICENSE +20 -0
  3. data/README.md +103 -0
  4. data/Rakefile +11 -0
  5. data/app/controllers/trainspotter/application_controller.rb +11 -0
  6. data/app/controllers/trainspotter/requests_controller.rb +46 -0
  7. data/app/controllers/trainspotter/sessions_controller.rb +30 -0
  8. data/app/engine_assets/javascripts/application.js +7 -0
  9. data/app/engine_assets/javascripts/controllers/requests_controller.js +67 -0
  10. data/app/engine_assets/javascripts/controllers/sessions_controller.js +43 -0
  11. data/app/engine_assets/stylesheets/application.css +549 -0
  12. data/app/helpers/trainspotter/ansi_to_html.rb +72 -0
  13. data/app/helpers/trainspotter/application_helper.rb +9 -0
  14. data/app/jobs/trainspotter/ingest/line.rb +44 -0
  15. data/app/jobs/trainspotter/ingest/params_parser.rb +36 -0
  16. data/app/jobs/trainspotter/ingest/parser.rb +194 -0
  17. data/app/jobs/trainspotter/ingest/processor.rb +70 -0
  18. data/app/jobs/trainspotter/ingest/reader.rb +84 -0
  19. data/app/jobs/trainspotter/ingest/session_builder.rb +52 -0
  20. data/app/jobs/trainspotter/ingest_job.rb +10 -0
  21. data/app/models/trainspotter/file_position_record.rb +17 -0
  22. data/app/models/trainspotter/record.rb +103 -0
  23. data/app/models/trainspotter/request.rb +108 -0
  24. data/app/models/trainspotter/request_record.rb +133 -0
  25. data/app/models/trainspotter/session_record.rb +71 -0
  26. data/app/views/layouts/trainspotter/application.html.erb +20 -0
  27. data/app/views/trainspotter/requests/_request.html.erb +51 -0
  28. data/app/views/trainspotter/requests/index.html.erb +49 -0
  29. data/app/views/trainspotter/sessions/_session.html.erb +28 -0
  30. data/app/views/trainspotter/sessions/index.html.erb +42 -0
  31. data/config/cucumber.yml +8 -0
  32. data/config/routes.rb +15 -0
  33. data/lib/trainspotter/background_worker.rb +74 -0
  34. data/lib/trainspotter/configuration.rb +68 -0
  35. data/lib/trainspotter/engine.rb +45 -0
  36. data/lib/trainspotter/version.rb +3 -0
  37. data/lib/trainspotter.rb +30 -0
  38. metadata +150 -0
@@ -0,0 +1,194 @@
1
+ module Trainspotter
2
+ module Ingest
3
+ class Parser
4
+ # Pattern to extract request ID tag from tagged logger output
5
+ # e.g., "[5de6cb4c-4a8e-4d87-bafd-3ce2281e26f4] Started GET..."
6
+ # or " [req-id] Post Load (0.5ms)..." (tag after leading whitespace)
7
+ TAG_PATTERN = /^(?<leading_space>\s*)\[(?<request_id>[^\]]+)\]\s*/
8
+
9
+ # Regex patterns for Rails log formats
10
+ PATTERNS = {
11
+ # Started GET "/posts" for 127.0.0.1 at 2024-01-06 10:00:00 +0000
12
+ request_start: /^Started (?<method>GET|POST|PUT|PATCH|DELETE|HEAD|OPTIONS) "(?<path>[^"]+)" for (?<ip>[\d.]+) at (?<timestamp>.+)$/,
13
+
14
+ # Processing by PostsController#index as HTML
15
+ # Also handles namespaced controllers like Trainspotter::LogsController
16
+ processing: /^Processing by (?<controller>[\w:]+)#(?<action>\w+) as (?<format>\w+|\*\/\*)/,
17
+
18
+ # Parameters: {"session"=>{"email"=>"alice@example.com", "password"=>"[FILTERED]"}}
19
+ params: /^\s*Parameters: (?<params_string>.+)$/,
20
+
21
+ # Post Load (0.5ms) SELECT "posts".* FROM "posts"
22
+ sql: /^\s*(?<name>[\w\s]+) \((?<duration>[\d.]+)ms\)\s+(?<query>.+)$/,
23
+
24
+ # Rendered posts/index.html.erb within layouts/application (Duration: 5.0ms | GC: 0.0ms)
25
+ render: /^\s*Rendered (?<template>[^\s]+)(?: within (?<layout>[^\s]+))? \(Duration: (?<duration>[\d.]+)ms/,
26
+
27
+ # Completed 200 OK in 50ms (Views: 40.0ms | ActiveRecord: 5.0ms | Allocations: 1234)
28
+ request_end: /^Completed (?<status>\d+) .+ in (?<duration>[\d.]+)ms/
29
+ }.freeze
30
+
31
+ def initialize
32
+ @groups_by_id = {}
33
+ @current_untagged_group = nil
34
+ @groups = []
35
+ end
36
+
37
+ def parse_line(line)
38
+ line = sanitize_encoding(line.chomp)
39
+ return nil if line.strip.empty?
40
+
41
+ request_id, content = extract_tag(line)
42
+ entry = identify_entry(content)
43
+
44
+ if request_id
45
+ handle_tagged_entry(request_id, entry)
46
+ else
47
+ handle_untagged_entry(entry)
48
+ end
49
+
50
+ entry
51
+ end
52
+
53
+ def parse_file(path, limit: nil)
54
+ reset_state
55
+
56
+ File.foreach(path).with_index do |line, index|
57
+ break if limit && index >= limit
58
+ parse_line(line)
59
+ end
60
+
61
+ finalize_all_groups
62
+ @groups
63
+ end
64
+
65
+ def parse_lines(lines)
66
+ reset_state
67
+
68
+ lines.each { |line| parse_line(line) }
69
+
70
+ finalize_all_groups
71
+ @groups
72
+ end
73
+
74
+ def groups
75
+ @groups.dup
76
+ end
77
+
78
+ private
79
+
80
+ def reset_state
81
+ @groups = []
82
+ @groups_by_id = {}
83
+ @current_untagged_group = nil
84
+ end
85
+
86
+ def extract_tag(line)
87
+ if (match = line.match(TAG_PATTERN))
88
+ leading_space = match[:leading_space] || ""
89
+ content = leading_space + line.sub(TAG_PATTERN, "")
90
+ [ match[:request_id], content ]
91
+ else
92
+ [ nil, line ]
93
+ end
94
+ end
95
+
96
+ def handle_tagged_entry(request_id, entry)
97
+ case entry.type
98
+ when :request_start
99
+ @groups_by_id[request_id] = Request.new(id: request_id)
100
+ @groups_by_id[request_id] << entry
101
+ when :request_end
102
+ if (group = @groups_by_id[request_id])
103
+ group << entry
104
+ group.completed = true
105
+ @groups << group
106
+ @groups_by_id.delete(request_id)
107
+ end
108
+ else
109
+ @groups_by_id[request_id]&.<<(entry)
110
+ end
111
+ end
112
+
113
+ def handle_untagged_entry(entry)
114
+ case entry.type
115
+ when :request_start
116
+ finalize_untagged_group
117
+ @current_untagged_group = Request.new
118
+ @current_untagged_group << entry
119
+ when :request_end
120
+ if @current_untagged_group
121
+ @current_untagged_group << entry
122
+ @current_untagged_group.completed = true
123
+ finalize_untagged_group
124
+ end
125
+ else
126
+ @current_untagged_group << entry if @current_untagged_group
127
+ end
128
+ end
129
+
130
+ def finalize_untagged_group
131
+ if @current_untagged_group&.entries&.any?
132
+ @groups << @current_untagged_group
133
+ end
134
+ @current_untagged_group = nil
135
+ end
136
+
137
+ def finalize_all_groups
138
+ finalize_untagged_group
139
+ @groups_by_id.each_value do |group|
140
+ @groups << group if group.entries.any?
141
+ end
142
+ @groups_by_id = {}
143
+ end
144
+
145
+ def identify_entry(line)
146
+ PATTERNS.each do |type, pattern|
147
+ if (match = line.match(pattern))
148
+ return build_entry(line, type, match)
149
+ end
150
+ end
151
+
152
+ Line.new(raw: line, type: :other)
153
+ end
154
+
155
+ def build_entry(line, type, match)
156
+ metadata = match.named_captures.transform_keys(&:to_sym)
157
+
158
+ case type
159
+ when :request_start
160
+ timestamp = parse_timestamp(metadata[:timestamp])
161
+ Line.new(raw: line, type: type, timestamp: timestamp, metadata: metadata)
162
+ when :processing
163
+ Line.new(raw: line, type: type, metadata: metadata)
164
+ when :params
165
+ params_hash = ParamsParser.parse(metadata[:params_string])
166
+ metadata[:params] = params_hash
167
+ Line.new(raw: line, type: type, metadata: metadata)
168
+ when :sql
169
+ metadata[:duration_ms] = metadata.delete(:duration).to_f
170
+ Line.new(raw: line, type: type, metadata: metadata)
171
+ when :render
172
+ metadata[:duration_ms] = metadata.delete(:duration).to_f
173
+ Line.new(raw: line, type: type, metadata: metadata)
174
+ when :request_end
175
+ metadata[:status] = metadata[:status].to_i
176
+ metadata[:duration_ms] = metadata.delete(:duration).to_f
177
+ Line.new(raw: line, type: type, metadata: metadata)
178
+ else
179
+ Line.new(raw: line, type: type, metadata: metadata)
180
+ end
181
+ end
182
+
183
+ def parse_timestamp(str)
184
+ Time.parse(str)
185
+ rescue ArgumentError
186
+ nil
187
+ end
188
+
189
+ def sanitize_encoding(str)
190
+ str.encode("UTF-8", invalid: :replace, undef: :replace, replace: "?")
191
+ end
192
+ end
193
+ end
194
+ end
@@ -0,0 +1,70 @@
1
+ module Trainspotter
2
+ module Ingest
3
+ class Processor < Struct.new(:log_path, :session_builder, :chunk_size, keyword_init: true)
4
+ DEFAULT_CHUNK_SIZE = 10_000
5
+
6
+ def self.call(log_paths, chunk_size: DEFAULT_CHUNK_SIZE)
7
+ session_builder = Ingest::SessionBuilder.new
8
+
9
+ log_paths.each do |log_path|
10
+ next unless File.exist?(log_path)
11
+ new(log_path:, session_builder:, chunk_size:).call
12
+ end
13
+ end
14
+
15
+ def call
16
+ if position = unread_position
17
+ process_chunk(position)
18
+ expire_stale_sessions
19
+ end
20
+ end
21
+
22
+ private
23
+
24
+ def log_filename
25
+ File.basename(log_path)
26
+ end
27
+
28
+ def unread_position
29
+ position = FilePositionRecord.get_position(log_filename)
30
+ file_size = File.size(log_path)
31
+
32
+ # Handle log rotation
33
+ position = 0 if file_size < position
34
+ return if file_size == position
35
+
36
+ position
37
+ end
38
+
39
+ def process_chunk(position)
40
+ new_position, lines = read_chunk(position)
41
+ return if lines.empty?
42
+
43
+ parse_and_persist(lines)
44
+ FilePositionRecord.update_position(log_filename, new_position)
45
+ end
46
+
47
+ def parse_and_persist(lines)
48
+ parser = Parser.new
49
+ lines.each { |line| parser.parse_line(line) }
50
+
51
+ parser.groups.each do |request|
52
+ RequestRecord.upsert_from_request(log_filename, request)
53
+ session_builder.process_request(request, log_filename) if request.completed?
54
+ end
55
+ end
56
+
57
+ def expire_stale_sessions
58
+ session_builder.expire_stale_sessions(log_filename)
59
+ end
60
+
61
+ def read_chunk(position)
62
+ File.open(log_path, "r") do |file|
63
+ file.seek(position)
64
+ lines = file.each_line.take(chunk_size || DEFAULT_CHUNK_SIZE).to_a
65
+ [file.pos, lines]
66
+ end
67
+ end
68
+ end
69
+ end
70
+ end
@@ -0,0 +1,84 @@
1
+ module Trainspotter
2
+ module Ingest
3
+ class Reader
4
+ attr_reader :path, :parser
5
+
6
+ def initialize(filename = nil)
7
+ @filename = filename || Trainspotter.default_log_file
8
+ @path = File.join(Trainspotter.log_directory, @filename)
9
+ @parser = Parser.new
10
+ @file_position = 0
11
+ end
12
+
13
+ def read_recent(limit: 100)
14
+ return [] unless File.exist?(path)
15
+
16
+ lines = tail_lines(limit * 20)
17
+ groups = parser.parse_lines(lines)
18
+ groups.last(limit)
19
+ end
20
+
21
+ def read_new_lines
22
+ return [] unless File.exist?(path)
23
+
24
+ current_size = File.size(path)
25
+
26
+ if current_size < @file_position
27
+ @file_position = 0
28
+ end
29
+
30
+ return [] if current_size == @file_position
31
+
32
+ new_lines = []
33
+ File.open(path, "r") do |file|
34
+ file.seek(@file_position)
35
+ new_lines = file.readlines
36
+ @file_position = file.pos
37
+ end
38
+
39
+ new_lines
40
+ end
41
+
42
+ def poll_for_changes
43
+ new_lines = read_new_lines
44
+ return [] if new_lines.empty?
45
+
46
+ new_groups = []
47
+ new_lines.each do |line|
48
+ parser.parse_line(line)
49
+ if parser.groups.any? && parser.groups.last.completed?
50
+ new_groups << parser.groups.last
51
+ end
52
+ end
53
+
54
+ new_groups
55
+ end
56
+
57
+ private
58
+
59
+ def tail_lines(count)
60
+ return [] unless File.exist?(path)
61
+
62
+ lines = []
63
+ File.open(path, "r") do |file|
64
+ file.seek(0, IO::SEEK_END)
65
+ buffer = ""
66
+ chunk_size = 8192
67
+
68
+ while lines.size < count && file.pos > 0
69
+ read_size = [ chunk_size, file.pos ].min
70
+ file.seek(-read_size, IO::SEEK_CUR)
71
+ chunk = file.read(read_size)
72
+ file.seek(-read_size, IO::SEEK_CUR)
73
+ buffer = chunk + buffer
74
+ lines = buffer.lines
75
+ end
76
+
77
+ @file_position = file.size
78
+ end
79
+
80
+ lines.last(count)
81
+ end
82
+ end
83
+ end
84
+ end
@@ -0,0 +1,52 @@
1
+ module Trainspotter
2
+ module Ingest
3
+ class SessionBuilder
4
+ def initialize(configuration: Trainspotter.configuration)
5
+ @configuration = configuration
6
+ end
7
+
8
+ def process_request(request, log_file)
9
+ return unless request.completed? && request.ip
10
+
11
+ session = find_or_create_session(request.ip, request.started_at, log_file)
12
+
13
+ if (email = detect_login(request))
14
+ session.update!(email: email)
15
+ end
16
+
17
+ if detect_logout(request)
18
+ session.update!(ended_at: request.started_at, end_reason: "logout")
19
+ end
20
+
21
+ RequestRecord.where(log_request_id: request.id).update_all(session_id: session.id)
22
+ session.increment!(:request_count)
23
+ session.update!(ended_at: request.started_at) if session.ongoing?
24
+ end
25
+
26
+ def expire_stale_sessions(log_file)
27
+ cutoff = Time.current - @configuration.session_timeout
28
+ SessionRecord.expire_before(cutoff, log_file: log_file)
29
+ end
30
+
31
+ private
32
+
33
+ def find_or_create_session(ip, timestamp, log_file)
34
+ timeout_cutoff = timestamp - @configuration.session_timeout
35
+ SessionRecord.find_active(ip: ip, after: timeout_cutoff, log_file: log_file) ||
36
+ SessionRecord.create!(ip: ip, started_at: timestamp, log_file: log_file)
37
+ end
38
+
39
+ def detect_login(request)
40
+ @configuration.login_detectors.each_value do |detector|
41
+ email = detector.call(request)
42
+ return email if email
43
+ end
44
+ nil
45
+ end
46
+
47
+ def detect_logout(request)
48
+ @configuration.logout_detectors.any? { |_, detector| detector.call(request) }
49
+ end
50
+ end
51
+ end
52
+ end
@@ -0,0 +1,10 @@
1
+ module Trainspotter
2
+ class IngestJob
3
+ def perform
4
+ log_paths = Trainspotter.available_log_files.map do |f|
5
+ File.join(Trainspotter.log_directory, f)
6
+ end
7
+ Trainspotter::Ingest::Processor.call(log_paths)
8
+ end
9
+ end
10
+ end
@@ -0,0 +1,17 @@
1
+ module Trainspotter
2
+ class FilePositionRecord < Record
3
+ self.table_name = "file_positions"
4
+ self.primary_key = "log_file"
5
+
6
+ def self.get_position(log_file)
7
+ find_by(log_file: log_file)&.position || 0
8
+ end
9
+
10
+ def self.update_position(log_file, position)
11
+ upsert(
12
+ { log_file: log_file, position: position, updated_at: Time.current },
13
+ unique_by: :log_file
14
+ )
15
+ end
16
+ end
17
+ end
@@ -0,0 +1,103 @@
1
+ module Trainspotter
2
+ class Record < ActiveRecord::Base
3
+ self.abstract_class = true
4
+
5
+ SCHEMA_VERSION = 4
6
+
7
+ class << self
8
+ def ensure_connected
9
+ return if @connected
10
+
11
+ establish_connection(
12
+ adapter: "sqlite3",
13
+ database: Trainspotter.database_path,
14
+ pool: 5,
15
+ timeout: 5000
16
+ )
17
+ ensure_schema
18
+ @connected = true
19
+ end
20
+
21
+ def reset_connection!
22
+ return unless @connected
23
+
24
+ connection_pool.disconnect!
25
+ connection_handler.remove_connection_pool(name) rescue nil
26
+ @connected = false
27
+ end
28
+
29
+ private
30
+
31
+ def ensure_schema
32
+ if schema_version != SCHEMA_VERSION
33
+ connection.drop_table(:requests, if_exists: true)
34
+ connection.drop_table(:sessions, if_exists: true)
35
+ connection.drop_table(:file_positions, if_exists: true)
36
+ end
37
+
38
+ define_schema
39
+ end
40
+
41
+ def define_schema
42
+ connection.create_table :requests, if_not_exists: true do |t|
43
+ t.string :log_request_id, null: false
44
+ t.string :log_file, null: false
45
+ t.string :method
46
+ t.string :path
47
+ t.integer :status
48
+ t.float :duration_ms
49
+ t.string :ip
50
+ t.string :controller
51
+ t.string :action
52
+ t.text :params_json
53
+ t.datetime :started_at
54
+ t.text :entries_json
55
+ t.boolean :completed, default: false
56
+ t.string :session_id
57
+ t.datetime :created_at, default: -> { "CURRENT_TIMESTAMP" }
58
+
59
+ t.index :log_request_id, unique: true, name: "idx_requests_log_request_id", if_not_exists: true
60
+ t.index [:log_file, :started_at], order: { started_at: :desc }, name: "idx_requests_log_file_started_at", if_not_exists: true
61
+ t.index [:log_file, :created_at], name: "idx_requests_log_file_created_at", if_not_exists: true
62
+ t.index :session_id, name: "idx_requests_session_id", if_not_exists: true
63
+ end
64
+
65
+ connection.create_table :sessions, id: false, if_not_exists: true do |t|
66
+ t.string :id, null: false, primary_key: true
67
+ t.string :ip, null: false
68
+ t.string :email
69
+ t.datetime :started_at
70
+ t.datetime :ended_at
71
+ t.string :end_reason, default: "ongoing"
72
+ t.integer :request_count, default: 0
73
+ t.string :log_file, null: false
74
+
75
+ t.index [:ip, :started_at], order: { started_at: :desc }, name: "idx_sessions_ip_started_at", if_not_exists: true
76
+ t.index :log_file, name: "idx_sessions_log_file", if_not_exists: true
77
+ end
78
+
79
+ connection.create_table :file_positions, id: false, if_not_exists: true do |t|
80
+ t.string :log_file, null: false, primary_key: true
81
+ t.integer :position, default: 0
82
+ t.datetime :updated_at, default: -> { "CURRENT_TIMESTAMP" }
83
+ end
84
+
85
+ connection.create_table :schema_migrations, id: false, if_not_exists: true do |t|
86
+ t.string :version, null: false
87
+ end
88
+
89
+ unless schema_version == SCHEMA_VERSION
90
+ connection.execute("DELETE FROM schema_migrations")
91
+ quoted_version = connection.quote(SCHEMA_VERSION.to_s)
92
+ connection.execute("INSERT INTO schema_migrations (version) VALUES (#{quoted_version})")
93
+ end
94
+ end
95
+
96
+ def schema_version
97
+ connection.select_value("SELECT version FROM schema_migrations ORDER BY version DESC LIMIT 1")&.to_i
98
+ rescue ActiveRecord::StatementInvalid
99
+ nil
100
+ end
101
+ end
102
+ end
103
+ end
@@ -0,0 +1,108 @@
1
+ module Trainspotter
2
+ class Request
3
+ attr_reader :id, :entries
4
+ attr_accessor :completed
5
+
6
+ def initialize(id: nil)
7
+ @id = id || SecureRandom.hex(8)
8
+ @entries = []
9
+ @completed = false
10
+ end
11
+
12
+ def <<(entry)
13
+ @entries << entry
14
+ end
15
+
16
+ def completed?
17
+ @completed
18
+ end
19
+
20
+ def method
21
+ start_entry&.metadata&.dig(:method) || "?"
22
+ end
23
+
24
+ def path
25
+ start_entry&.metadata&.dig(:path) || "?"
26
+ end
27
+
28
+ def ip
29
+ start_entry&.metadata&.dig(:ip)
30
+ end
31
+
32
+ def controller
33
+ processing_entry&.metadata&.dig(:controller)
34
+ end
35
+
36
+ def action
37
+ processing_entry&.metadata&.dig(:action)
38
+ end
39
+
40
+ def params
41
+ params_entry&.metadata&.dig(:params)
42
+ end
43
+
44
+ def status
45
+ end_entry&.metadata&.dig(:status)
46
+ end
47
+
48
+ def duration_ms
49
+ end_entry&.metadata&.dig(:duration_ms)
50
+ end
51
+
52
+ def started_at
53
+ start_entry&.timestamp
54
+ end
55
+
56
+ def sql_entries
57
+ @entries.select(&:sql?)
58
+ end
59
+
60
+ def render_entries
61
+ @entries.select(&:render?)
62
+ end
63
+
64
+ def sql_count
65
+ sql_entries.size
66
+ end
67
+
68
+ def sql_duration_ms
69
+ sql_entries.sum { |e| e.duration_ms || 0 }
70
+ end
71
+
72
+ def render_count
73
+ render_entries.size
74
+ end
75
+
76
+ def render_duration_ms
77
+ render_entries.sum { |e| e.duration_ms || 0 }
78
+ end
79
+
80
+ def status_class
81
+ case status
82
+ when 200..299 then "success"
83
+ when 300..399 then "redirect"
84
+ when 400..499 then "client-error"
85
+ when 500..599 then "server-error"
86
+ else "unknown"
87
+ end
88
+ end
89
+
90
+ private
91
+
92
+ def start_entry
93
+ @entries.find(&:request_start?)
94
+ end
95
+
96
+ def processing_entry
97
+ @entries.find(&:processing?)
98
+ end
99
+
100
+ def params_entry
101
+ @entries.find(&:params?)
102
+ end
103
+
104
+ def end_entry
105
+ @entries.find(&:request_end?)
106
+ end
107
+ end
108
+ end