topical 0.0.1.pre.1 → 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: topical
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.1.pre.1
4
+ version: 0.1.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Chris Petersen
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2025-08-30 00:00:00.000000000 Z
11
+ date: 2025-09-06 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: clusterkit
@@ -16,28 +16,28 @@ dependencies:
16
16
  requirements:
17
17
  - - ">="
18
18
  - !ruby/object:Gem::Version
19
- version: '0.1'
19
+ version: 0.2.2
20
20
  type: :runtime
21
21
  prerelease: false
22
22
  version_requirements: !ruby/object:Gem::Requirement
23
23
  requirements:
24
24
  - - ">="
25
25
  - !ruby/object:Gem::Version
26
- version: '0.1'
26
+ version: 0.2.2
27
27
  - !ruby/object:Gem::Dependency
28
28
  name: red-candle
29
29
  requirement: !ruby/object:Gem::Requirement
30
30
  requirements:
31
31
  - - ">="
32
32
  - !ruby/object:Gem::Version
33
- version: '1.0'
33
+ version: '1.2'
34
34
  type: :development
35
35
  prerelease: false
36
36
  version_requirements: !ruby/object:Gem::Requirement
37
37
  requirements:
38
38
  - - ">="
39
39
  - !ruby/object:Gem::Version
40
- version: '1.0'
40
+ version: '1.2'
41
41
  - !ruby/object:Gem::Dependency
42
42
  name: rspec
43
43
  requirement: !ruby/object:Gem::Requirement
@@ -80,6 +80,20 @@ dependencies:
80
80
  - - "~>"
81
81
  - !ruby/object:Gem::Version
82
82
  version: '1.3'
83
+ - !ruby/object:Gem::Dependency
84
+ name: simplecov
85
+ requirement: !ruby/object:Gem::Requirement
86
+ requirements:
87
+ - - "~>"
88
+ - !ruby/object:Gem::Version
89
+ version: '0.22'
90
+ type: :development
91
+ prerelease: false
92
+ version_requirements: !ruby/object:Gem::Requirement
93
+ requirements:
94
+ - - "~>"
95
+ - !ruby/object:Gem::Version
96
+ version: '0.22'
83
97
  description: Extract topics from document embeddings using HDBSCAN clustering and
84
98
  c-TF-IDF term extraction. Provides automatic topic labeling, quality metrics, and
85
99
  support for various clustering algorithms.
@@ -95,30 +109,32 @@ files:
95
109
  - LICENSE.txt
96
110
  - README.md
97
111
  - Rakefile
112
+ - docs/assets/topical-wide.png
113
+ - examples/detect_new_topics.rb
98
114
  - examples/quick_demo.rb
115
+ - examples/topic_summaries_with_llm.rb
99
116
  - examples/verify_migration.rb
100
117
  - lib/topical.rb
101
118
  - lib/topical/clustering/adapter.rb
102
119
  - lib/topical/clustering/hdbscan_adapter.rb
103
120
  - lib/topical/clustering/kmeans_adapter.rb
121
+ - lib/topical/dimensionality_reducer.rb
104
122
  - lib/topical/engine.rb
105
123
  - lib/topical/extractors/term_extractor.rb
106
124
  - lib/topical/labelers/base.rb
107
- - lib/topical/labelers/hybrid.rb
108
- - lib/topical/labelers/llm_adapter.rb
109
- - lib/topical/labelers/llm_based.rb
110
125
  - lib/topical/labelers/term_based.rb
111
126
  - lib/topical/metrics.rb
127
+ - lib/topical/model_serializer.rb
112
128
  - lib/topical/topic.rb
113
129
  - lib/topical/version.rb
114
130
  - sig/topical.rbs
115
- homepage: https://github.com/cpetersen/topical
131
+ homepage: https://github.com/scientist-labs/topical
116
132
  licenses:
117
133
  - MIT
118
134
  metadata:
119
- homepage_uri: https://github.com/cpetersen/topical
120
- source_code_uri: https://github.com/cpetersen/topical
121
- changelog_uri: https://github.com/cpetersen/topical/blob/main/CHANGELOG.md
135
+ homepage_uri: https://github.com/scientist-labs/topical
136
+ source_code_uri: https://github.com/scientist-labs/topical
137
+ changelog_uri: https://github.com/scientist-labs/topical/blob/main/CHANGELOG.md
122
138
  documentation_uri: https://rubydoc.info/gems/topical
123
139
  post_install_message:
124
140
  rdoc_options: []
@@ -1,24 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- module Topical
4
- module Labelers
5
- # Hybrid labeling that combines term-based and LLM approaches
6
- class Hybrid < Base
7
- def initialize(provider: nil)
8
- @term_labeler = TermBased.new
9
- @llm_labeler = LLMBased.new(provider: provider)
10
- end
11
-
12
- def generate_label(topic)
13
- # Start with term-based label
14
- term_label = @term_labeler.generate_label(topic)
15
-
16
- # Try to enhance with LLM if available
17
- llm_label = @llm_labeler.generate_label(topic)
18
-
19
- # For now, just return the LLM label if different, otherwise term label
20
- llm_label != "LLM Topic #{topic.id}" ? llm_label : term_label
21
- end
22
- end
23
- end
24
- end
@@ -1,126 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- module Topical
4
- module Labelers
5
- # Adapter to allow different LLM backends (red-candle, remote APIs, etc.)
6
- class LLMAdapter
7
- # Factory method to create appropriate LLM client
8
- def self.create(type: :auto, **options)
9
- case type
10
- when :red_candle
11
- RedCandleAdapter.new(**options)
12
- when :openai
13
- # Future: OpenAIAdapter.new(**options)
14
- raise NotImplementedError, "OpenAI adapter not yet implemented"
15
- when :anthropic
16
- # Future: AnthropicAdapter.new(**options)
17
- raise NotImplementedError, "Anthropic adapter not yet implemented"
18
- when :auto
19
- # Try red-candle first, then fall back to others
20
- begin
21
- RedCandleAdapter.new(**options)
22
- rescue LoadError
23
- nil # No LLM available
24
- end
25
- else
26
- raise ArgumentError, "Unknown LLM type: #{type}"
27
- end
28
- end
29
- end
30
-
31
- # Adapter for red-candle (local LLMs)
32
- class RedCandleAdapter
33
- def initialize(model: nil, **options)
34
- require 'red-candle'
35
-
36
- @model = model || default_model
37
- @options = options
38
- @llm = load_or_create_llm
39
- end
40
-
41
- def generate(prompt:, max_tokens: 100, temperature: 0.3, response_format: nil)
42
- # Red-candle specific generation
43
- response = @llm.generate(
44
- prompt,
45
- max_length: max_tokens,
46
- temperature: temperature,
47
- do_sample: temperature > 0
48
- )
49
-
50
- # Handle JSON response format if requested
51
- if response_format && response_format[:type] == "json_object"
52
- ensure_json_response(response)
53
- else
54
- response
55
- end
56
- end
57
-
58
- def available?
59
- true
60
- end
61
-
62
- private
63
-
64
- def default_model
65
- # Use a small, fast model by default for topic labeling
66
- "TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF"
67
- end
68
-
69
- def load_or_create_llm
70
- # Create new LLM instance with red-candle
71
- RedCandle::Model.new(
72
- model_id: @model,
73
- model_type: :llama,
74
- quantized: true
75
- )
76
- end
77
-
78
- def ensure_json_response(response)
79
- # Try to extract JSON from response
80
- begin
81
- require 'json'
82
- # Look for JSON-like content
83
- json_match = response.match(/\{.*\}/m)
84
- if json_match
85
- JSON.parse(json_match[0])
86
- json_match[0] # Return the JSON string if valid
87
- else
88
- # Generate a basic JSON response
89
- generate_fallback_json(response)
90
- end
91
- rescue JSON::ParserError
92
- generate_fallback_json(response)
93
- end
94
- end
95
-
96
- def generate_fallback_json(text)
97
- # Create a simple JSON from text response
98
- require 'json'
99
- label = text.lines.first&.strip || "Unknown"
100
- {
101
- label: label,
102
- description: text,
103
- confidence: 0.5
104
- }.to_json
105
- end
106
- end
107
-
108
- # Future adapter for remote LLMs
109
- class RemoteAdapter
110
- def initialize(api_key:, endpoint:, **options)
111
- @api_key = api_key
112
- @endpoint = endpoint
113
- @options = options
114
- end
115
-
116
- def generate(prompt:, max_tokens: 100, temperature: 0.3, response_format: nil)
117
- # Make API call
118
- raise NotImplementedError, "Remote LLM adapter coming soon"
119
- end
120
-
121
- def available?
122
- !@api_key.nil?
123
- end
124
- end
125
- end
126
- end
@@ -1,111 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- module Topical
4
- module Labelers
5
- # LLM-powered topic labeling (requires red-candle or other LLM provider)
6
- class LLMBased < Base
7
- def initialize(provider: nil)
8
- @provider = provider
9
- end
10
-
11
- def generate_label(topic)
12
- unless llm_available?
13
- # Fallback to term-based if LLM not available
14
- return TermBased.new.generate_label(topic)
15
- end
16
-
17
- # Select best documents to send to LLM
18
- sample_docs = topic.representative_docs(k: 3)
19
-
20
- # Generate comprehensive analysis
21
- response = analyze_with_llm(sample_docs, topic.terms)
22
-
23
- response[:label]
24
- rescue => e
25
- # Fallback on error
26
- puts "LLM labeling failed: #{e.message}" if ENV['DEBUG']
27
- TermBased.new.generate_label(topic)
28
- end
29
-
30
- private
31
-
32
- def llm_available?
33
- return true if @provider
34
-
35
- # Try to create LLM adapter
36
- begin
37
- require_relative 'llm_adapter'
38
- @provider = LLMAdapter.create(type: :auto)
39
- @provider && @provider.available?
40
- rescue LoadError, StandardError => e
41
- puts "LLM not available: #{e.message}" if ENV['DEBUG']
42
- false
43
- end
44
- end
45
-
46
- def analyze_with_llm(documents, terms)
47
- prompt = build_analysis_prompt(documents, terms)
48
-
49
- response = @provider.generate(
50
- prompt: prompt,
51
- max_tokens: 150,
52
- temperature: 0.3,
53
- response_format: { type: "json_object" }
54
- )
55
-
56
- # Parse JSON response
57
- require 'json'
58
- result = JSON.parse(response, symbolize_names: true)
59
-
60
- # Validate and clean
61
- {
62
- label: clean_label(result[:label]),
63
- description: result[:description] || "Topic about #{result[:label]}",
64
- themes: result[:themes] || [],
65
- confidence: result[:confidence] || 0.8
66
- }
67
- end
68
-
69
- def build_analysis_prompt(documents, terms)
70
- doc_samples = documents.map.with_index do |doc, i|
71
- preview = doc.length > 300 ? "#{doc[0..300]}..." : doc
72
- "Document #{i + 1}:\n#{preview}"
73
- end.join("\n\n")
74
-
75
- <<~PROMPT
76
- Analyze this cluster of related documents and provide a structured summary.
77
-
78
- Distinctive terms found: #{terms.first(10).join(', ')}
79
-
80
- Sample documents:
81
- #{doc_samples}
82
-
83
- Provide a JSON response with:
84
- {
85
- "label": "A 2-4 word topic label",
86
- "description": "One sentence describing what connects these documents",
87
- "themes": ["theme1", "theme2", "theme3"],
88
- "confidence": 0.0-1.0 score of how coherent this topic is
89
- }
90
-
91
- Focus on what meaningfully connects these documents, not just common words.
92
- PROMPT
93
- end
94
-
95
- def clean_label(label)
96
- return "Unknown Topic" unless label
97
-
98
- # Remove quotes, trim, limit length
99
- cleaned = label.to_s.strip.gsub(/^["']|["']$/, '')
100
- cleaned = cleaned.split("\n").first if cleaned.include?("\n")
101
-
102
- # Limit to reasonable length
103
- if cleaned.length > 50
104
- cleaned[0..47] + "..."
105
- else
106
- cleaned
107
- end
108
- end
109
- end
110
- end
111
- end