lex-telemetry 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: 041ec72d71e8c5ffef2fde576bdb413385e882f23594ba4cce1a9ce643ecef1c
4
+ data.tar.gz: 495a8e595017691e84a68fd4c1c674f740702e6935458c9cff70eda772112b4d
5
+ SHA512:
6
+ metadata.gz: d1c0a40828925c7c562d9704d9baede1ae27d84c7d16040b15be14ddd7f76310ad5973c02b8720e88e9afbd62578bfbd6b7706b4d2b3c3831f96de644f861de5
7
+ data.tar.gz: '0951709435fdc2f9c99f5b367cd504a6b1a7f13f7fedd7701bfff17277e0556654939fbb1692070f7b39422a77cef18203b98462dded097fac1318f815924c55'
@@ -0,0 +1,16 @@
1
+ name: CI
2
+ on:
3
+ push:
4
+ branches: [main]
5
+ pull_request:
6
+
7
+ jobs:
8
+ ci:
9
+ uses: LegionIO/.github/.github/workflows/ci.yml@main
10
+
11
+ release:
12
+ needs: ci
13
+ if: github.event_name == 'push' && github.ref == 'refs/heads/main'
14
+ uses: LegionIO/.github/.github/workflows/release.yml@main
15
+ secrets:
16
+ rubygems-api-key: ${{ secrets.RUBYGEMS_API_KEY }}
data/.gitignore ADDED
@@ -0,0 +1,7 @@
1
+ /.bundle/
2
+ /vendor/bundle
3
+ /tmp/
4
+ /pkg/
5
+ *.gem
6
+ .rspec_status
7
+ Gemfile.lock
data/.rspec ADDED
@@ -0,0 +1,3 @@
1
+ --format documentation
2
+ --color
3
+ --require spec_helper
data/.rubocop.yml ADDED
@@ -0,0 +1,67 @@
1
+ AllCops:
2
+ TargetRubyVersion: 3.4
3
+ NewCops: enable
4
+ SuggestExtensions: false
5
+
6
+ Layout/LineLength:
7
+ Max: 160
8
+
9
+ Layout/SpaceAroundEqualsInParameterDefault:
10
+ EnforcedStyle: space
11
+
12
+ Layout/HashAlignment:
13
+ EnforcedHashRocketStyle: table
14
+ EnforcedColonStyle: table
15
+
16
+ Metrics/MethodLength:
17
+ Max: 50
18
+
19
+ Metrics/ClassLength:
20
+ Max: 1500
21
+
22
+ Metrics/ModuleLength:
23
+ Max: 1500
24
+
25
+ Metrics/BlockLength:
26
+ Max: 40
27
+ Exclude:
28
+ - 'spec/**/*'
29
+
30
+ Metrics/AbcSize:
31
+ Max: 60
32
+
33
+ Metrics/CyclomaticComplexity:
34
+ Max: 15
35
+
36
+ Metrics/PerceivedComplexity:
37
+ Max: 17
38
+
39
+ Style/Documentation:
40
+ Enabled: false
41
+
42
+ Style/SymbolArray:
43
+ Enabled: true
44
+
45
+ Style/FrozenStringLiteralComment:
46
+ Enabled: true
47
+ EnforcedStyle: always
48
+
49
+ Naming/FileName:
50
+ Enabled: false
51
+
52
+ Naming/PredicateMethod:
53
+ Enabled: false
54
+
55
+ Gemspec/DevelopmentDependencies:
56
+ Enabled: false
57
+
58
+ Metrics/ParameterLists:
59
+ Enabled: false
60
+
61
+ Lint/EmptyClass:
62
+ Exclude:
63
+ - 'spec/**/*'
64
+
65
+ Style/MultilineIfModifier:
66
+ Exclude:
67
+ - 'spec/**/*'
data/CHANGELOG.md ADDED
@@ -0,0 +1,24 @@
1
+ # Changelog
2
+
3
+ ## [Unreleased]
4
+
5
+ ## [0.1.1] - 2026-03-17
6
+
7
+ ### Changed
8
+ - Renamed `module Actors` to `module Actor` (singular) in collector and publisher actors
9
+ - Updated specs to reference `Actor::Collector` and `Actor::Publisher` accordingly
10
+
11
+ ## [0.1.0] - 2026-03-15
12
+
13
+ ### Added
14
+ - TelemetryEvent normalized event shape with 5 event types (tool_call, llm_request, error, session_start, session_end)
15
+ - Scrubber with whitelist per tool, 3 levels (minimal/standard/paranoid), PII regex patterns
16
+ - Claude Code JSONL parser with incremental byte-offset reads
17
+ - Parser base interface for future tool adapters (Codex, legion-chat)
18
+ - EventStore: in-memory buffer (10k cap) with pending queue for AMQP
19
+ - Stats: session_summary and aggregate_stats computation
20
+ - Runner: ingest_session, session_stats, aggregate_stats, telemetry_status, publish_pending, collect
21
+ - Publisher actor (Every 60s): flushes pending events to telemetry.sessions AMQP exchange
22
+ - Collector actor (Every 300s): scans ~/.claude/projects for session JSONL files
23
+ - HighWaterMark: per-file byte offset tracking for incremental ingestion
24
+ - AMQP transport: telemetry.sessions exchange (topic), telemetry.sessions.process queue (durable)
data/CLAUDE.md ADDED
@@ -0,0 +1,117 @@
1
+ # lex-telemetry: Session Log Analytics for LegionIO
2
+
3
+ **Repository Level 3 Documentation**
4
+ - **Parent**: `/Users/miverso2/rubymine/legion/extensions-core/CLAUDE.md`
5
+ - **Grandparent**: `/Users/miverso2/rubymine/legion/CLAUDE.md`
6
+
7
+ ## Purpose
8
+
9
+ Core Legion Extension that ingests AI tool session logs (Claude Code JSONL), normalizes events into a common TelemetryEvent shape, scrubs sensitive content, buffers in memory, computes local stats, and publishes operational telemetry to AMQP for central service consumption.
10
+
11
+ **GitHub**: https://github.com/LegionIO/lex-telemetry
12
+ **License**: MIT
13
+ **Version**: 0.1.1
14
+
15
+ ## Architecture
16
+
17
+ ```
18
+ Legion::Extensions::Telemetry
19
+ ├── Helpers/
20
+ │ ├── TelemetryEvent # Event shape builder with 5-type validation
21
+ │ ├── Scrubber # Whitelist-based scrubbing (3 levels) + PII regex
22
+ │ ├── EventStore # In-memory buffer (10k cap) + pending queue for AMQP
23
+ │ ├── Stats # session_summary + aggregate_stats computation
24
+ │ └── HighWaterMark # Per-file byte offset tracking for incremental ingestion
25
+ ├── Parsers/
26
+ │ ├── Base # Parser interface (source_name, can_parse?, parse)
27
+ │ └── ClaudeCode # Claude Code JSONL parser with incremental byte-offset reads
28
+ ├── Runners/
29
+ │ └── Telemetry # ingest_session, session_stats, aggregate_stats, telemetry_status,
30
+ │ # publish_pending, collect
31
+ ├── Actors/
32
+ │ ├── Publisher # Every 60s: flushes pending events to AMQP
33
+ │ └── Collector # Every 300s: scans session directories for new JSONL files
34
+ └── Transport/
35
+ ├── Exchanges/Sessions # telemetry.sessions topic exchange
36
+ ├── Queues/SessionsProcess # telemetry.sessions.process durable queue
37
+ └── Messages/TelemetryMessage # Routing key: telemetry.{source}.{event_type}
38
+ ```
39
+
40
+ ## TelemetryEvent Shape
41
+
42
+ ```ruby
43
+ {
44
+ event_type: :tool_call, # :tool_call, :llm_request, :error, :session_start, :session_end
45
+ session_id: "uuid",
46
+ source: :claude_code, # :claude_code, :codex, :legion_chat, etc.
47
+ timestamp: Time,
48
+ tool_name: "Read", # nil for non-tool events
49
+ tool_input: { file_path: "/path/to/file.rb" }, # scrubbed
50
+ duration_ms: 1000, # computed from tool_use -> tool_result gap
51
+ tokens: { input: 100, output: 50, cache_read: 200, cache_write: 0 },
52
+ error: nil,
53
+ metadata: {}
54
+ }
55
+ ```
56
+
57
+ ## Scrub Levels
58
+
59
+ | Level | Behavior |
60
+ |-------|----------|
61
+ | `:minimal` | PII regex only, keeps all tool inputs |
62
+ | `:standard` (default) | Whitelist per tool + PII regex |
63
+ | `:paranoid` | Strips everything except event_type, tool_name, timestamps, tokens |
64
+
65
+ ## Tool Whitelist (Standard Level)
66
+
67
+ | Tool | Allowed Keys |
68
+ |------|-------------|
69
+ | Read | file_path, offset, limit |
70
+ | Write | file_path |
71
+ | Edit | file_path |
72
+ | Glob | pattern, path |
73
+ | Grep | pattern, path, include |
74
+ | Bash | description, timeout |
75
+ | Agent | description, subagent_type |
76
+
77
+ Unknown tools: all input stripped at standard level.
78
+
79
+ ## Runner Methods
80
+
81
+ | Method | Purpose |
82
+ |--------|---------|
83
+ | `ingest_session(file_path:, scrub_level:)` | Parse + scrub + store, returns summary |
84
+ | `session_stats(session_id:)` | Per-session breakdown (tool counts, tokens, files, errors) |
85
+ | `aggregate_stats` | Cross-session totals (frequencies, error rate, most-read files) |
86
+ | `telemetry_status` | Buffer size, pending count, session count, parser list |
87
+ | `publish_pending` | Flush pending events to AMQP (called by Publisher actor) |
88
+ | `collect` | Scan SCAN_DIRS for new session files (called by Collector actor) |
89
+
90
+ ## Actors
91
+
92
+ | Actor | Interval | Runner Method |
93
+ |-------|----------|--------------|
94
+ | Publisher | 60s | publish_pending |
95
+ | Collector | 300s | collect |
96
+
97
+ ## Integration Points
98
+
99
+ - **legion-transport** (optional): AMQP exchange/queue/message for telemetry publishing. If unavailable, events buffer in pending queue.
100
+ - **LegionIO CLI**: `legion telemetry stats/ingest/status` subcommands
101
+ - **lex-privatecore**: Scrubber carries own PII regex patterns (no hard dependency)
102
+
103
+ ## Design Doc
104
+
105
+ `docs/work/completed/2026-03-15-session-log-analytics-design.md`
106
+
107
+ ## Development
108
+
109
+ ```bash
110
+ bundle install
111
+ bundle exec rspec # 60 specs
112
+ bundle exec rubocop # 0 offenses
113
+ ```
114
+
115
+ ---
116
+
117
+ **Maintained By**: Matthew Iverson (@Esity)
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ # frozen_string_literal: true
2
+
3
+ source 'https://rubygems.org'
4
+ gemspec
data/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 LegionIO
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,43 @@
1
+ # lex-telemetry
2
+
3
+ Session log analytics pipeline for LegionIO. Ingests AI tool session logs (Claude Code JSONL),
4
+ normalizes events, scrubs sensitive content, computes local stats, and publishes operational
5
+ telemetry to AMQP for central consumption.
6
+
7
+ ## Features
8
+
9
+ - **Parsers**: Claude Code JSONL (extensible to Codex, legion-chat)
10
+ - **Scrubber**: Whitelist per tool, 3 levels (minimal/standard/paranoid), PII detection
11
+ - **EventStore**: In-memory buffer (10k cap) with pending queue
12
+ - **Stats**: Per-session and cross-session analytics
13
+ - **AMQP Publishing**: Telemetry events to `telemetry.sessions` exchange
14
+ - **Collector**: Auto-discovers session files in `~/.claude/projects/`
15
+
16
+ ## CLI
17
+
18
+ ```bash
19
+ legion telemetry stats # Aggregate stats across sessions
20
+ legion telemetry stats <session_id> # Per-session breakdown
21
+ legion telemetry ingest <path> # Manual file ingestion
22
+ legion telemetry status # Buffer health and publisher state
23
+ ```
24
+
25
+ ## Installation
26
+
27
+ Add to your Gemfile:
28
+
29
+ ```ruby
30
+ gem 'lex-telemetry'
31
+ ```
32
+
33
+ ## Development
34
+
35
+ ```bash
36
+ bundle install
37
+ bundle exec rspec # 60 specs
38
+ bundle exec rubocop # 0 offenses
39
+ ```
40
+
41
+ ## License
42
+
43
+ MIT
@@ -0,0 +1,36 @@
1
+ # frozen_string_literal: true
2
+
3
+ lib = File.expand_path('lib', __dir__)
4
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
5
+ require 'legion/extensions/telemetry/version'
6
+
7
+ Gem::Specification.new do |spec|
8
+ spec.name = 'lex-telemetry'
9
+ spec.version = Legion::Extensions::Telemetry::VERSION
10
+ spec.authors = ['Esity']
11
+ spec.email = ['matthewdiverson@gmail.com']
12
+
13
+ spec.summary = 'Legion::Extensions::Telemetry'
14
+ spec.description = 'Session log analytics pipeline: ingestion, normalization, scrubbing, stats, and AMQP telemetry publishing'
15
+ spec.homepage = 'https://github.com/LegionIO/lex-telemetry'
16
+ spec.license = 'MIT'
17
+ spec.required_ruby_version = '>= 3.4'
18
+
19
+ spec.metadata['homepage_uri'] = spec.homepage
20
+ spec.metadata['source_code_uri'] = 'https://github.com/LegionIO/lex-telemetry'
21
+ spec.metadata['changelog_uri'] = 'https://github.com/LegionIO/lex-telemetry/blob/main/CHANGELOG.md'
22
+ spec.metadata['documentation_uri'] = 'https://github.com/LegionIO/lex-telemetry'
23
+ spec.metadata['bug_tracker_uri'] = 'https://github.com/LegionIO/lex-telemetry/issues'
24
+ spec.metadata['rubygems_mfa_required'] = 'true'
25
+
26
+ spec.files = Dir.chdir(File.expand_path(__dir__)) do
27
+ `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
28
+ end
29
+ spec.require_paths = ['lib']
30
+
31
+ spec.add_development_dependency 'rake'
32
+ spec.add_development_dependency 'rspec'
33
+ spec.add_development_dependency 'rubocop'
34
+ spec.add_development_dependency 'rubocop-rspec'
35
+ spec.add_development_dependency 'simplecov'
36
+ end
@@ -0,0 +1,39 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Legion
4
+ module Extensions
5
+ module Telemetry
6
+ module Actor
7
+ class Collector < Legion::Extensions::Actors::Every
8
+ def runner_class
9
+ 'Legion::Extensions::Telemetry::Runners::Telemetry'
10
+ end
11
+
12
+ def runner_function
13
+ 'collect'
14
+ end
15
+
16
+ def time
17
+ 300
18
+ end
19
+
20
+ def run_now?
21
+ false
22
+ end
23
+
24
+ def use_runner?
25
+ false
26
+ end
27
+
28
+ def check_subtask?
29
+ false
30
+ end
31
+
32
+ def generate_task?
33
+ false
34
+ end
35
+ end
36
+ end
37
+ end
38
+ end
39
+ end
@@ -0,0 +1,39 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Legion
4
+ module Extensions
5
+ module Telemetry
6
+ module Actor
7
+ class Publisher < Legion::Extensions::Actors::Every
8
+ def runner_class
9
+ 'Legion::Extensions::Telemetry::Runners::Telemetry'
10
+ end
11
+
12
+ def runner_function
13
+ 'publish_pending'
14
+ end
15
+
16
+ def time
17
+ 60
18
+ end
19
+
20
+ def run_now?
21
+ false
22
+ end
23
+
24
+ def use_runner?
25
+ false
26
+ end
27
+
28
+ def check_subtask?
29
+ false
30
+ end
31
+
32
+ def generate_task?
33
+ false
34
+ end
35
+ end
36
+ end
37
+ end
38
+ end
39
+ end
@@ -0,0 +1,42 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Legion
4
+ module Extensions
5
+ module Telemetry
6
+ module Helpers
7
+ class EventStore
8
+ MAX_BUFFER = 10_000
9
+
10
+ attr_reader :events, :pending, :sessions
11
+
12
+ def initialize
13
+ @events = []
14
+ @pending = []
15
+ @sessions = {}
16
+ end
17
+
18
+ def store(event:)
19
+ @events.shift if @events.length >= MAX_BUFFER
20
+ @events << event
21
+ @pending << event
22
+
23
+ sid = event[:session_id]
24
+ @sessions[sid] ||= { first_seen: event[:timestamp], event_count: 0 }
25
+ @sessions[sid][:event_count] += 1
26
+ @sessions[sid][:last_seen] = event[:timestamp]
27
+ end
28
+
29
+ def flush_pending
30
+ flushed = @pending.dup
31
+ @pending.clear
32
+ flushed
33
+ end
34
+
35
+ def events_for(session_id:)
36
+ @events.select { |e| e[:session_id] == session_id }
37
+ end
38
+ end
39
+ end
40
+ end
41
+ end
42
+ end
@@ -0,0 +1,32 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Legion
4
+ module Extensions
5
+ module Telemetry
6
+ module Helpers
7
+ class HighWaterMark
8
+ def initialize
9
+ @marks = {}
10
+ @completed = Set.new
11
+ end
12
+
13
+ def get(path:)
14
+ @marks.fetch(path, 0)
15
+ end
16
+
17
+ def set(path:, offset:)
18
+ @marks[path] = offset
19
+ end
20
+
21
+ def ingested?(path:)
22
+ @completed.include?(path)
23
+ end
24
+
25
+ def mark_complete(path:)
26
+ @completed.add(path)
27
+ end
28
+ end
29
+ end
30
+ end
31
+ end
32
+ end
@@ -0,0 +1,81 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Legion
4
+ module Extensions
5
+ module Telemetry
6
+ module Helpers
7
+ module Scrubber
8
+ TOOL_WHITELIST = {
9
+ 'Read' => %i[file_path offset limit],
10
+ 'Write' => %i[file_path],
11
+ 'Edit' => %i[file_path],
12
+ 'Glob' => %i[pattern path],
13
+ 'Grep' => %i[pattern path include],
14
+ 'Bash' => %i[description timeout],
15
+ 'Agent' => %i[description subagent_type]
16
+ }.freeze
17
+
18
+ PII_PATTERNS = {
19
+ /\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b/ => '[EMAIL]',
20
+ /\b\d{3}-\d{2}-\d{4}\b/ => '[SSN]',
21
+ /\b(?:\+?1[-.\s]?)?\(?\d{3}\)?[-.\s]?\d{3}[-.\s]?\d{4}\b/ => '[PHONE]'
22
+ }.freeze
23
+
24
+ PARANOID_KEEP = %i[event_type tool_name timestamp tokens session_id source].freeze
25
+
26
+ module_function
27
+
28
+ def scrub(event:, level: :standard)
29
+ scrubbed = event.dup
30
+
31
+ case level
32
+ when :minimal
33
+ scrub_pii(scrubbed)
34
+ when :standard
35
+ scrub_tool_input(scrubbed)
36
+ scrub_pii(scrubbed)
37
+ when :paranoid
38
+ scrub_paranoid(scrubbed)
39
+ end
40
+
41
+ scrubbed
42
+ end
43
+
44
+ def scrub_tool_input(event)
45
+ return unless event[:tool_input].is_a?(Hash)
46
+
47
+ whitelist = TOOL_WHITELIST[event[:tool_name]]
48
+ unless whitelist
49
+ event[:tool_input] = nil
50
+ return
51
+ end
52
+
53
+ event[:tool_input] = event[:tool_input].slice(*whitelist)
54
+ end
55
+
56
+ def scrub_pii(event)
57
+ return unless event[:tool_input].is_a?(Hash)
58
+
59
+ event[:tool_input] = event[:tool_input].transform_values { |v| scrub_pii_value(v) }
60
+ end
61
+
62
+ def scrub_pii_value(value)
63
+ return value unless value.is_a?(String)
64
+
65
+ PII_PATTERNS.each do |pattern, replacement|
66
+ value = value.gsub(pattern, replacement)
67
+ end
68
+ value
69
+ end
70
+
71
+ def scrub_paranoid(event)
72
+ event[:tool_input] = nil
73
+ event[:error] = nil
74
+ event[:metadata] = nil
75
+ event.delete_if { |k, _| !PARANOID_KEEP.include?(k) }
76
+ end
77
+ end
78
+ end
79
+ end
80
+ end
81
+ end
@@ -0,0 +1,78 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Legion
4
+ module Extensions
5
+ module Telemetry
6
+ module Helpers
7
+ module Stats
8
+ module_function
9
+
10
+ def session_summary(store:, session_id:)
11
+ events = store.events_for(session_id: session_id)
12
+ tool_events = events.select { |e| e[:event_type] == :tool_call }
13
+ error_events = events.select { |e| e[:event_type] == :error }
14
+
15
+ tool_counts = tool_events.each_with_object(Hash.new(0)) do |e, h|
16
+ h[e[:tool_name]] += 1 if e[:tool_name]
17
+ end
18
+
19
+ tokens = sum_tokens(events)
20
+ files = extract_files(tool_events)
21
+ durations = tool_events.filter_map { |e| e[:duration_ms] }
22
+
23
+ {
24
+ session_id: session_id,
25
+ event_count: events.length,
26
+ tool_counts: tool_counts,
27
+ tokens: tokens,
28
+ error_count: error_events.length,
29
+ unique_files: files,
30
+ avg_duration_ms: durations.empty? ? 0 : (durations.sum.to_f / durations.length).round,
31
+ total_duration_ms: durations.sum
32
+ }
33
+ end
34
+
35
+ def aggregate_stats(store:)
36
+ tool_frequency = Hash.new(0)
37
+ file_frequency = Hash.new(0)
38
+ total_tokens = { input: 0, output: 0, cache_read: 0, cache_write: 0 }
39
+
40
+ store.events.each do |e|
41
+ tool_frequency[e[:tool_name]] += 1 if e[:event_type] == :tool_call && e[:tool_name]
42
+
43
+ total_tokens.each_key { |k| total_tokens[k] += e[:tokens][k] || 0 } if e[:tokens].is_a?(Hash)
44
+
45
+ file_frequency[e[:tool_input][:file_path]] += 1 if e[:tool_input].is_a?(Hash) && e[:tool_input][:file_path]
46
+ end
47
+
48
+ error_count = store.events.count { |e| e[:event_type] == :error }
49
+
50
+ {
51
+ session_count: store.sessions.length,
52
+ total_events: store.events.length,
53
+ tool_frequency: tool_frequency,
54
+ tokens: total_tokens,
55
+ error_count: error_count,
56
+ error_rate: store.events.empty? ? 0.0 : (error_count.to_f / store.events.length).round(4),
57
+ most_read_files: file_frequency.sort_by { |_, v| -v }.first(10).to_h
58
+ }
59
+ end
60
+
61
+ def sum_tokens(events)
62
+ totals = { input: 0, output: 0, cache_read: 0, cache_write: 0 }
63
+ events.each do |e|
64
+ next unless e[:tokens].is_a?(Hash)
65
+
66
+ totals.each_key { |k| totals[k] += e[:tokens][k] || 0 }
67
+ end
68
+ totals
69
+ end
70
+
71
+ def extract_files(tool_events)
72
+ tool_events.filter_map { |e| e[:tool_input]&.dig(:file_path) }.uniq
73
+ end
74
+ end
75
+ end
76
+ end
77
+ end
78
+ end
@@ -0,0 +1,33 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Legion
4
+ module Extensions
5
+ module Telemetry
6
+ module Helpers
7
+ module TelemetryEvent
8
+ VALID_EVENT_TYPES = %i[tool_call llm_request error session_start session_end].freeze
9
+
10
+ module_function
11
+
12
+ def build(event_type:, session_id:, source:, timestamp:, tool_name: nil,
13
+ tool_input: nil, duration_ms: nil, tokens: nil, error: nil, metadata: nil)
14
+ raise ArgumentError, "invalid event_type: #{event_type}" unless VALID_EVENT_TYPES.include?(event_type)
15
+
16
+ {
17
+ event_type: event_type,
18
+ session_id: session_id,
19
+ source: source,
20
+ timestamp: timestamp,
21
+ tool_name: tool_name,
22
+ tool_input: tool_input,
23
+ duration_ms: duration_ms,
24
+ tokens: tokens,
25
+ error: error,
26
+ metadata: metadata
27
+ }
28
+ end
29
+ end
30
+ end
31
+ end
32
+ end
33
+ end
@@ -0,0 +1,23 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Legion
4
+ module Extensions
5
+ module Telemetry
6
+ module Parsers
7
+ module Base
8
+ def source_name
9
+ raise NotImplementedError
10
+ end
11
+
12
+ def can_parse?(_path)
13
+ raise NotImplementedError
14
+ end
15
+
16
+ def parse(_path, offset: 0, &_block)
17
+ raise NotImplementedError
18
+ end
19
+ end
20
+ end
21
+ end
22
+ end
23
+ end
@@ -0,0 +1,150 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'json'
4
+ require 'time'
5
+
6
+ module Legion
7
+ module Extensions
8
+ module Telemetry
9
+ module Parsers
10
+ class ClaudeCode
11
+ include Base
12
+
13
+ def source_name
14
+ :claude_code
15
+ end
16
+
17
+ def can_parse?(path)
18
+ first_line = File.open(path, &:readline)
19
+ data = ::JSON.parse(first_line)
20
+ data.is_a?(Hash) && (data.key?('sessionId') || data.key?('parentUuid'))
21
+ rescue StandardError
22
+ false
23
+ end
24
+
25
+ def parse(path, offset: 0, &block)
26
+ pending_tools = {}
27
+ session_id = nil
28
+
29
+ File.open(path) do |f|
30
+ f.seek(offset) if offset.positive?
31
+ first_line = offset.zero?
32
+
33
+ f.each_line do |line|
34
+ data = ::JSON.parse(line.strip)
35
+ timestamp = parse_timestamp(data['timestamp'])
36
+
37
+ if first_line && offset.zero?
38
+ session_id = data['sessionId'] || data['uuid'] || SecureRandom.uuid
39
+ block.call(Helpers::TelemetryEvent.build(
40
+ event_type: :session_start,
41
+ session_id: session_id,
42
+ source: source_name,
43
+ timestamp: timestamp,
44
+ metadata: { version: data['version'] }
45
+ ))
46
+ first_line = false
47
+ next
48
+ end
49
+
50
+ session_id ||= data['sessionId'] || 'unknown'
51
+ msg = data['message']
52
+ next unless msg.is_a?(Hash)
53
+
54
+ process_assistant(msg, session_id, timestamp, pending_tools, &block) if msg['role'] == 'assistant'
55
+ process_tool_result(msg, session_id, timestamp, pending_tools, &block) if msg['role'] == 'user'
56
+ rescue ::JSON::ParserError
57
+ next
58
+ end
59
+
60
+ return f.pos
61
+ end
62
+ end
63
+
64
+ private
65
+
66
+ def process_assistant(msg, session_id, timestamp, pending_tools, &block)
67
+ content = msg['content']
68
+ usage = msg['usage']
69
+
70
+ if content.is_a?(Array)
71
+ content.each do |item|
72
+ next unless item['type'] == 'tool_use'
73
+
74
+ pending_tools[item['id']] = {
75
+ tool_name: item['name'],
76
+ tool_input: symbolize_keys(item['input']),
77
+ timestamp: timestamp,
78
+ tokens: extract_tokens(usage)
79
+ }
80
+ end
81
+ end
82
+
83
+ return unless usage
84
+
85
+ has_tool_use = content.is_a?(Array) && content.any? { |b| b['type'] == 'tool_use' }
86
+ return if has_tool_use
87
+
88
+ block.call(Helpers::TelemetryEvent.build(
89
+ event_type: :llm_request,
90
+ session_id: session_id,
91
+ source: source_name,
92
+ timestamp: timestamp,
93
+ tokens: extract_tokens(usage)
94
+ ))
95
+ end
96
+
97
+ def process_tool_result(msg, session_id, timestamp, pending_tools, &block)
98
+ content = msg['content']
99
+ return unless content.is_a?(Array)
100
+
101
+ content.each do |item|
102
+ next unless item['type'] == 'tool_result'
103
+
104
+ pending = pending_tools.delete(item['tool_use_id'])
105
+ next unless pending
106
+
107
+ duration_ms = ((timestamp - pending[:timestamp]) * 1000).to_i if timestamp && pending[:timestamp]
108
+
109
+ block.call(Helpers::TelemetryEvent.build(
110
+ event_type: :tool_call,
111
+ session_id: session_id,
112
+ source: source_name,
113
+ timestamp: pending[:timestamp],
114
+ tool_name: pending[:tool_name],
115
+ tool_input: pending[:tool_input],
116
+ duration_ms: duration_ms,
117
+ tokens: pending[:tokens]
118
+ ))
119
+ end
120
+ end
121
+
122
+ def extract_tokens(usage)
123
+ return nil unless usage.is_a?(Hash)
124
+
125
+ {
126
+ input: usage['input_tokens'] || 0,
127
+ output: usage['output_tokens'] || 0,
128
+ cache_read: usage['cache_read_input_tokens'] || 0,
129
+ cache_write: usage['cache_creation_input_tokens'] || 0
130
+ }
131
+ end
132
+
133
+ def parse_timestamp(str)
134
+ return nil unless str
135
+
136
+ Time.parse(str)
137
+ rescue ArgumentError
138
+ nil
139
+ end
140
+
141
+ def symbolize_keys(hash)
142
+ return nil unless hash.is_a?(Hash)
143
+
144
+ hash.transform_keys(&:to_sym)
145
+ end
146
+ end
147
+ end
148
+ end
149
+ end
150
+ end
@@ -0,0 +1,134 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Legion
4
+ module Extensions
5
+ module Telemetry
6
+ module Runners
7
+ module Telemetry
8
+ module_function
9
+
10
+ SCAN_DIRS = [
11
+ File.expand_path('~/.claude/projects')
12
+ ].freeze
13
+
14
+ def event_store
15
+ @event_store ||= Helpers::EventStore.new
16
+ end
17
+
18
+ def parsers
19
+ @parsers ||= [Parsers::ClaudeCode.new]
20
+ end
21
+
22
+ def ingest_session(file_path:, scrub_level: :standard, **_opts)
23
+ parser = parsers.find { |p| p.can_parse?(file_path) }
24
+ return { success: false, error: 'no parser found' } unless parser
25
+
26
+ session_id = nil
27
+ count = 0
28
+
29
+ parser.parse(file_path) do |event|
30
+ scrubbed = Helpers::Scrubber.scrub(event: event, level: scrub_level)
31
+ event_store.store(event: scrubbed)
32
+ session_id ||= scrubbed[:session_id]
33
+ count += 1
34
+ end
35
+
36
+ { success: true, session_id: session_id, event_count: count, file_path: file_path }
37
+ rescue StandardError => e
38
+ { success: false, error: e.message }
39
+ end
40
+
41
+ def session_stats(session_id:, **_opts)
42
+ events = event_store.events_for(session_id: session_id)
43
+ return { success: false, error: 'session not found' } if events.empty?
44
+
45
+ stats = Helpers::Stats.session_summary(store: event_store, session_id: session_id)
46
+ { success: true, stats: stats }
47
+ end
48
+
49
+ def aggregate_stats(**_opts)
50
+ stats = Helpers::Stats.aggregate_stats(store: event_store)
51
+ { success: true, stats: stats }
52
+ end
53
+
54
+ def telemetry_status(**_opts)
55
+ {
56
+ success: true,
57
+ buffer_size: event_store.events.length,
58
+ pending_count: event_store.pending.length,
59
+ session_count: event_store.sessions.length,
60
+ parsers: parsers.map(&:source_name)
61
+ }
62
+ end
63
+
64
+ def publish_pending(**_opts)
65
+ events = event_store.flush_pending
66
+ return { success: true, published: 0 } if events.empty?
67
+
68
+ published = 0
69
+ events.each do |event|
70
+ if defined?(Legion::Extensions::Telemetry::Transport::Messages::TelemetryMessage)
71
+ routing_key = "telemetry.#{event[:source]}.#{event[:event_type]}"
72
+ Transport::Messages::TelemetryMessage.new.publish(event, routing_key: routing_key)
73
+ published += 1
74
+ end
75
+ rescue StandardError
76
+ event_store.pending.push(event)
77
+ end
78
+
79
+ { success: true, published: published, remaining: event_store.pending.length }
80
+ rescue StandardError => e
81
+ { success: false, error: e.message }
82
+ end
83
+
84
+ def high_water_mark
85
+ @high_water_mark ||= Helpers::HighWaterMark.new
86
+ end
87
+
88
+ def collect(**_opts)
89
+ files_processed = 0
90
+ events_ingested = 0
91
+
92
+ SCAN_DIRS.each do |dir|
93
+ next unless Dir.exist?(dir)
94
+
95
+ Dir.glob(File.join(dir, '**', '*.jsonl')).each do |path|
96
+ next if high_water_mark.ingested?(path: path)
97
+
98
+ current_size = File.size(path)
99
+ last_offset = high_water_mark.get(path: path)
100
+ next if current_size <= last_offset
101
+
102
+ parser = parsers.find { |p| p.can_parse?(path) }
103
+ next unless parser
104
+
105
+ count = 0
106
+ new_offset = parser.parse(path, offset: last_offset) do |event|
107
+ scrubbed = Helpers::Scrubber.scrub(event: event, level: :standard)
108
+ event_store.store(event: scrubbed)
109
+ count += 1
110
+ end
111
+
112
+ high_water_mark.set(path: path, offset: new_offset)
113
+ files_processed += 1
114
+ events_ingested += count
115
+ rescue StandardError
116
+ next
117
+ end
118
+ end
119
+
120
+ { success: true, files_processed: files_processed, events_ingested: events_ingested }
121
+ rescue StandardError => e
122
+ { success: false, error: e.message }
123
+ end
124
+
125
+ def reset!
126
+ @event_store = nil
127
+ @parsers = nil
128
+ @high_water_mark = nil
129
+ end
130
+ end
131
+ end
132
+ end
133
+ end
134
+ end
@@ -0,0 +1,15 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Legion
4
+ module Extensions
5
+ module Telemetry
6
+ module Transport
7
+ module Exchanges
8
+ class Sessions < Legion::Transport::Exchange
9
+ def exchange_name = 'telemetry.sessions'
10
+ end
11
+ end
12
+ end
13
+ end
14
+ end
15
+ end
@@ -0,0 +1,16 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Legion
4
+ module Extensions
5
+ module Telemetry
6
+ module Transport
7
+ module Messages
8
+ class TelemetryMessage < Legion::Transport::Message
9
+ def routing_key = 'telemetry.sessions.process'
10
+ def exchange = Legion::Extensions::Telemetry::Transport::Exchanges::Sessions
11
+ end
12
+ end
13
+ end
14
+ end
15
+ end
16
+ end
@@ -0,0 +1,16 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Legion
4
+ module Extensions
5
+ module Telemetry
6
+ module Transport
7
+ module Queues
8
+ class SessionsProcess < Legion::Transport::Queue
9
+ def queue_name = 'telemetry.sessions.process'
10
+ def queue_options = { auto_delete: false }
11
+ end
12
+ end
13
+ end
14
+ end
15
+ end
16
+ end
@@ -0,0 +1,9 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Legion
4
+ module Extensions
5
+ module Telemetry
6
+ VERSION = '0.1.1'
7
+ end
8
+ end
9
+ end
@@ -0,0 +1,19 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'legion/extensions/telemetry/version'
4
+ require 'legion/extensions/telemetry/helpers/telemetry_event'
5
+ require 'legion/extensions/telemetry/helpers/scrubber'
6
+ require 'legion/extensions/telemetry/parsers/base'
7
+ require 'legion/extensions/telemetry/parsers/claude_code'
8
+ require 'legion/extensions/telemetry/helpers/event_store'
9
+ require 'legion/extensions/telemetry/helpers/stats'
10
+ require 'legion/extensions/telemetry/helpers/high_water_mark'
11
+ require 'legion/extensions/telemetry/runners/telemetry'
12
+
13
+ module Legion
14
+ module Extensions
15
+ module Telemetry
16
+ extend Legion::Extensions::Core if Legion::Extensions.const_defined? :Core
17
+ end
18
+ end
19
+ end
metadata ADDED
@@ -0,0 +1,142 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: lex-telemetry
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.1
5
+ platform: ruby
6
+ authors:
7
+ - Esity
8
+ bindir: bin
9
+ cert_chain: []
10
+ date: 1980-01-02 00:00:00.000000000 Z
11
+ dependencies:
12
+ - !ruby/object:Gem::Dependency
13
+ name: rake
14
+ requirement: !ruby/object:Gem::Requirement
15
+ requirements:
16
+ - - ">="
17
+ - !ruby/object:Gem::Version
18
+ version: '0'
19
+ type: :development
20
+ prerelease: false
21
+ version_requirements: !ruby/object:Gem::Requirement
22
+ requirements:
23
+ - - ">="
24
+ - !ruby/object:Gem::Version
25
+ version: '0'
26
+ - !ruby/object:Gem::Dependency
27
+ name: rspec
28
+ requirement: !ruby/object:Gem::Requirement
29
+ requirements:
30
+ - - ">="
31
+ - !ruby/object:Gem::Version
32
+ version: '0'
33
+ type: :development
34
+ prerelease: false
35
+ version_requirements: !ruby/object:Gem::Requirement
36
+ requirements:
37
+ - - ">="
38
+ - !ruby/object:Gem::Version
39
+ version: '0'
40
+ - !ruby/object:Gem::Dependency
41
+ name: rubocop
42
+ requirement: !ruby/object:Gem::Requirement
43
+ requirements:
44
+ - - ">="
45
+ - !ruby/object:Gem::Version
46
+ version: '0'
47
+ type: :development
48
+ prerelease: false
49
+ version_requirements: !ruby/object:Gem::Requirement
50
+ requirements:
51
+ - - ">="
52
+ - !ruby/object:Gem::Version
53
+ version: '0'
54
+ - !ruby/object:Gem::Dependency
55
+ name: rubocop-rspec
56
+ requirement: !ruby/object:Gem::Requirement
57
+ requirements:
58
+ - - ">="
59
+ - !ruby/object:Gem::Version
60
+ version: '0'
61
+ type: :development
62
+ prerelease: false
63
+ version_requirements: !ruby/object:Gem::Requirement
64
+ requirements:
65
+ - - ">="
66
+ - !ruby/object:Gem::Version
67
+ version: '0'
68
+ - !ruby/object:Gem::Dependency
69
+ name: simplecov
70
+ requirement: !ruby/object:Gem::Requirement
71
+ requirements:
72
+ - - ">="
73
+ - !ruby/object:Gem::Version
74
+ version: '0'
75
+ type: :development
76
+ prerelease: false
77
+ version_requirements: !ruby/object:Gem::Requirement
78
+ requirements:
79
+ - - ">="
80
+ - !ruby/object:Gem::Version
81
+ version: '0'
82
+ description: 'Session log analytics pipeline: ingestion, normalization, scrubbing,
83
+ stats, and AMQP telemetry publishing'
84
+ email:
85
+ - matthewdiverson@gmail.com
86
+ executables: []
87
+ extensions: []
88
+ extra_rdoc_files: []
89
+ files:
90
+ - ".github/workflows/ci.yml"
91
+ - ".gitignore"
92
+ - ".rspec"
93
+ - ".rubocop.yml"
94
+ - CHANGELOG.md
95
+ - CLAUDE.md
96
+ - Gemfile
97
+ - LICENSE
98
+ - README.md
99
+ - lex-telemetry.gemspec
100
+ - lib/legion/extensions/telemetry.rb
101
+ - lib/legion/extensions/telemetry/actors/collector.rb
102
+ - lib/legion/extensions/telemetry/actors/publisher.rb
103
+ - lib/legion/extensions/telemetry/helpers/event_store.rb
104
+ - lib/legion/extensions/telemetry/helpers/high_water_mark.rb
105
+ - lib/legion/extensions/telemetry/helpers/scrubber.rb
106
+ - lib/legion/extensions/telemetry/helpers/stats.rb
107
+ - lib/legion/extensions/telemetry/helpers/telemetry_event.rb
108
+ - lib/legion/extensions/telemetry/parsers/base.rb
109
+ - lib/legion/extensions/telemetry/parsers/claude_code.rb
110
+ - lib/legion/extensions/telemetry/runners/telemetry.rb
111
+ - lib/legion/extensions/telemetry/transport/exchanges/sessions.rb
112
+ - lib/legion/extensions/telemetry/transport/messages/telemetry_message.rb
113
+ - lib/legion/extensions/telemetry/transport/queues/sessions_process.rb
114
+ - lib/legion/extensions/telemetry/version.rb
115
+ homepage: https://github.com/LegionIO/lex-telemetry
116
+ licenses:
117
+ - MIT
118
+ metadata:
119
+ homepage_uri: https://github.com/LegionIO/lex-telemetry
120
+ source_code_uri: https://github.com/LegionIO/lex-telemetry
121
+ changelog_uri: https://github.com/LegionIO/lex-telemetry/blob/main/CHANGELOG.md
122
+ documentation_uri: https://github.com/LegionIO/lex-telemetry
123
+ bug_tracker_uri: https://github.com/LegionIO/lex-telemetry/issues
124
+ rubygems_mfa_required: 'true'
125
+ rdoc_options: []
126
+ require_paths:
127
+ - lib
128
+ required_ruby_version: !ruby/object:Gem::Requirement
129
+ requirements:
130
+ - - ">="
131
+ - !ruby/object:Gem::Version
132
+ version: '3.4'
133
+ required_rubygems_version: !ruby/object:Gem::Requirement
134
+ requirements:
135
+ - - ">="
136
+ - !ruby/object:Gem::Version
137
+ version: '0'
138
+ requirements: []
139
+ rubygems_version: 3.6.9
140
+ specification_version: 4
141
+ summary: Legion::Extensions::Telemetry
142
+ test_files: []