lex-rfp 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.github/workflows/ci.yml +16 -0
- data/.gitignore +12 -0
- data/.rspec +3 -0
- data/.rubocop.yml +66 -0
- data/CHANGELOG.md +15 -0
- data/CLAUDE.md +80 -0
- data/Dockerfile +6 -0
- data/Gemfile +12 -0
- data/LICENSE +21 -0
- data/README.md +119 -0
- data/lex-rfp.gemspec +32 -0
- data/lib/legion/extensions/rfp/analytics/client.rb +31 -0
- data/lib/legion/extensions/rfp/analytics/helpers/client.rb +24 -0
- data/lib/legion/extensions/rfp/analytics/runners/metrics.rb +87 -0
- data/lib/legion/extensions/rfp/analytics/runners/quality.rb +121 -0
- data/lib/legion/extensions/rfp/analytics/runners/win_rates.rb +88 -0
- data/lib/legion/extensions/rfp/analytics.rb +16 -0
- data/lib/legion/extensions/rfp/generate/client.rb +31 -0
- data/lib/legion/extensions/rfp/generate/helpers/client.rb +24 -0
- data/lib/legion/extensions/rfp/generate/runners/drafts.rb +98 -0
- data/lib/legion/extensions/rfp/generate/runners/sections.rb +97 -0
- data/lib/legion/extensions/rfp/generate/runners/templates.rb +61 -0
- data/lib/legion/extensions/rfp/generate.rb +16 -0
- data/lib/legion/extensions/rfp/ingest/client.rb +31 -0
- data/lib/legion/extensions/rfp/ingest/helpers/client.rb +24 -0
- data/lib/legion/extensions/rfp/ingest/runners/corpus.rb +66 -0
- data/lib/legion/extensions/rfp/ingest/runners/documents.rb +86 -0
- data/lib/legion/extensions/rfp/ingest/runners/parser.rb +84 -0
- data/lib/legion/extensions/rfp/ingest.rb +16 -0
- data/lib/legion/extensions/rfp/review/client.rb +31 -0
- data/lib/legion/extensions/rfp/review/helpers/client.rb +24 -0
- data/lib/legion/extensions/rfp/review/runners/approvals.rb +70 -0
- data/lib/legion/extensions/rfp/review/runners/comments.rb +76 -0
- data/lib/legion/extensions/rfp/review/runners/workflows.rb +86 -0
- data/lib/legion/extensions/rfp/review.rb +16 -0
- data/lib/legion/extensions/rfp/version.rb +9 -0
- data/lib/legion/extensions/rfp.rb +15 -0
- metadata +99 -0
|
@@ -0,0 +1,88 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Legion
|
|
4
|
+
module Extensions
|
|
5
|
+
module Rfp
|
|
6
|
+
module Analytics
|
|
7
|
+
module Runners
|
|
8
|
+
module WinRates
|
|
9
|
+
extend Legion::Extensions::Rfp::Analytics::Helpers::Client
|
|
10
|
+
|
|
11
|
+
def overall_win_rate(proposals:, **)
|
|
12
|
+
decided = proposals.select { |p| %i[won lost].include?(p[:outcome]) }
|
|
13
|
+
return { result: 0.0, decided: 0, total: proposals.length } if decided.empty?
|
|
14
|
+
|
|
15
|
+
won = decided.count { |p| p[:outcome] == :won }
|
|
16
|
+
{ result: (won.to_f / decided.length).round(4), won: won, decided: decided.length }
|
|
17
|
+
end
|
|
18
|
+
|
|
19
|
+
def win_rate_by_source(proposals:, **)
|
|
20
|
+
grouped = proposals.group_by { |p| p[:rfp_source] }
|
|
21
|
+
rates = grouped.transform_values do |group|
|
|
22
|
+
decided = group.select { |p| %i[won lost].include?(p[:outcome]) }
|
|
23
|
+
next { rate: 0.0, decided: 0 } if decided.empty?
|
|
24
|
+
|
|
25
|
+
won = decided.count { |p| p[:outcome] == :won }
|
|
26
|
+
{ rate: (won.to_f / decided.length).round(4), won: won, decided: decided.length }
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
{ result: rates }
|
|
30
|
+
end
|
|
31
|
+
|
|
32
|
+
def win_rate_by_template(proposals:, **)
|
|
33
|
+
grouped = proposals.group_by { |p| p[:template] }
|
|
34
|
+
rates = grouped.transform_values do |group|
|
|
35
|
+
decided = group.select { |p| %i[won lost].include?(p[:outcome]) }
|
|
36
|
+
next { rate: 0.0, decided: 0 } if decided.empty?
|
|
37
|
+
|
|
38
|
+
won = decided.count { |p| p[:outcome] == :won }
|
|
39
|
+
{ rate: (won.to_f / decided.length).round(4), won: won, decided: decided.length }
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
{ result: rates }
|
|
43
|
+
end
|
|
44
|
+
|
|
45
|
+
def trend(proposals:, period: :monthly, **)
|
|
46
|
+
sorted = proposals.sort_by { |p| p[:submitted_at] || '' }
|
|
47
|
+
grouped = case period
|
|
48
|
+
when :monthly
|
|
49
|
+
sorted.group_by { |p| p[:submitted_at]&.slice(0, 7) }
|
|
50
|
+
when :quarterly
|
|
51
|
+
sorted.group_by { |p| quarter_key(p[:submitted_at]) }
|
|
52
|
+
else
|
|
53
|
+
sorted.group_by { |p| p[:submitted_at]&.slice(0, 4) }
|
|
54
|
+
end
|
|
55
|
+
|
|
56
|
+
trend_data = grouped.transform_values do |group|
|
|
57
|
+
decided = group.select { |p| %i[won lost].include?(p[:outcome]) }
|
|
58
|
+
won = decided.count { |p| p[:outcome] == :won }
|
|
59
|
+
{
|
|
60
|
+
total: group.length,
|
|
61
|
+
decided: decided.length,
|
|
62
|
+
won: won,
|
|
63
|
+
rate: decided.empty? ? 0.0 : (won.to_f / decided.length).round(4)
|
|
64
|
+
}
|
|
65
|
+
end
|
|
66
|
+
|
|
67
|
+
{ result: trend_data, period: period }
|
|
68
|
+
end
|
|
69
|
+
|
|
70
|
+
private
|
|
71
|
+
|
|
72
|
+
def quarter_key(date_str)
|
|
73
|
+
return nil unless date_str
|
|
74
|
+
|
|
75
|
+
year = date_str[0, 4]
|
|
76
|
+
month = date_str[5, 2].to_i
|
|
77
|
+
quarter = ((month - 1) / 3) + 1
|
|
78
|
+
"#{year}-Q#{quarter}"
|
|
79
|
+
end
|
|
80
|
+
|
|
81
|
+
include Legion::Extensions::Helpers::Lex if Legion::Extensions.const_defined?(:Helpers) &&
|
|
82
|
+
Legion::Extensions::Helpers.const_defined?(:Lex)
|
|
83
|
+
end
|
|
84
|
+
end
|
|
85
|
+
end
|
|
86
|
+
end
|
|
87
|
+
end
|
|
88
|
+
end
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require 'legion/extensions/rfp/analytics/helpers/client'
|
|
4
|
+
require 'legion/extensions/rfp/analytics/runners/metrics'
|
|
5
|
+
require 'legion/extensions/rfp/analytics/runners/win_rates'
|
|
6
|
+
require 'legion/extensions/rfp/analytics/runners/quality'
|
|
7
|
+
require 'legion/extensions/rfp/analytics/client'
|
|
8
|
+
|
|
9
|
+
module Legion
|
|
10
|
+
module Extensions
|
|
11
|
+
module Rfp
|
|
12
|
+
module Analytics
|
|
13
|
+
end
|
|
14
|
+
end
|
|
15
|
+
end
|
|
16
|
+
end
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative 'helpers/client'
|
|
4
|
+
require_relative 'runners/drafts'
|
|
5
|
+
require_relative 'runners/sections'
|
|
6
|
+
require_relative 'runners/templates'
|
|
7
|
+
|
|
8
|
+
module Legion
|
|
9
|
+
module Extensions
|
|
10
|
+
module Rfp
|
|
11
|
+
module Generate
|
|
12
|
+
class Client
|
|
13
|
+
include Helpers::Client
|
|
14
|
+
include Runners::Drafts
|
|
15
|
+
include Runners::Sections
|
|
16
|
+
include Runners::Templates
|
|
17
|
+
|
|
18
|
+
attr_reader :opts
|
|
19
|
+
|
|
20
|
+
def initialize(base_url: nil, token: nil, **)
|
|
21
|
+
@opts = { base_url: base_url, token: token }.compact
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
def client(**override)
|
|
25
|
+
super(**@opts, **override)
|
|
26
|
+
end
|
|
27
|
+
end
|
|
28
|
+
end
|
|
29
|
+
end
|
|
30
|
+
end
|
|
31
|
+
end
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require 'faraday'
|
|
4
|
+
|
|
5
|
+
module Legion
|
|
6
|
+
module Extensions
|
|
7
|
+
module Rfp
|
|
8
|
+
module Generate
|
|
9
|
+
module Helpers
|
|
10
|
+
module Client
|
|
11
|
+
def client(base_url: 'http://localhost:4567', token: nil, **)
|
|
12
|
+
Faraday.new(url: base_url) do |conn|
|
|
13
|
+
conn.request :json
|
|
14
|
+
conn.response :json, content_type: /\bjson$/
|
|
15
|
+
conn.headers['Content-Type'] = 'application/json'
|
|
16
|
+
conn.headers['Authorization'] = "Bearer #{token}" if token
|
|
17
|
+
end
|
|
18
|
+
end
|
|
19
|
+
end
|
|
20
|
+
end
|
|
21
|
+
end
|
|
22
|
+
end
|
|
23
|
+
end
|
|
24
|
+
end
|
|
@@ -0,0 +1,98 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Legion
|
|
4
|
+
module Extensions
|
|
5
|
+
module Rfp
|
|
6
|
+
module Generate
|
|
7
|
+
module Runners
|
|
8
|
+
module Drafts
|
|
9
|
+
extend Legion::Extensions::Rfp::Generate::Helpers::Client
|
|
10
|
+
|
|
11
|
+
def generate_full_draft(rfp_text:, context: {}, model: nil, **)
|
|
12
|
+
questions = parse_rfp(rfp_text)
|
|
13
|
+
responses = questions.map do |question|
|
|
14
|
+
generate_section_response(question: question[:question], section: question[:section], context: context,
|
|
15
|
+
model: model)
|
|
16
|
+
end
|
|
17
|
+
|
|
18
|
+
draft = responses.map { |r| r[:result] }.join("\n\n---\n\n")
|
|
19
|
+
{ result: draft, sections: responses.length, questions_answered: responses.length }
|
|
20
|
+
end
|
|
21
|
+
|
|
22
|
+
def generate_response(question:, context: {}, model: nil, scope: :all, **)
|
|
23
|
+
retrieved = retrieve_context(question: question, scope: scope)
|
|
24
|
+
prompt = build_prompt(question: question, context: context, retrieved: retrieved)
|
|
25
|
+
|
|
26
|
+
answer = call_llm(prompt: prompt, model: model)
|
|
27
|
+
{ result: answer, context_used: retrieved.length, question: question }
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
def regenerate(question:, previous_answer:, feedback:, context: {}, model: nil, **)
|
|
31
|
+
prompt = build_revision_prompt(
|
|
32
|
+
question: question,
|
|
33
|
+
previous: previous_answer,
|
|
34
|
+
feedback: feedback,
|
|
35
|
+
context: context
|
|
36
|
+
)
|
|
37
|
+
|
|
38
|
+
answer = call_llm(prompt: prompt, model: model)
|
|
39
|
+
{ result: answer, question: question, revision: true }
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
private
|
|
43
|
+
|
|
44
|
+
def parse_rfp(text)
|
|
45
|
+
obj = Object.new
|
|
46
|
+
obj.extend(Legion::Extensions::Rfp::Ingest::Runners::Parser)
|
|
47
|
+
parsed = obj.parse_rfp_questions(text: text)
|
|
48
|
+
parsed[:result]
|
|
49
|
+
end
|
|
50
|
+
|
|
51
|
+
def retrieve_context(question:, scope:)
|
|
52
|
+
return [] unless defined?(Legion::Apollo)
|
|
53
|
+
|
|
54
|
+
result = Legion::Apollo.retrieve(query: question, scope: scope, limit: 5)
|
|
55
|
+
result.is_a?(Array) ? result : []
|
|
56
|
+
end
|
|
57
|
+
|
|
58
|
+
def build_prompt(question:, context:, retrieved:)
|
|
59
|
+
parts = ['You are an expert proposal writer for a healthcare organization.']
|
|
60
|
+
parts << 'Use the following reference material to craft your response:'
|
|
61
|
+
|
|
62
|
+
retrieved.each_with_index do |doc, idx|
|
|
63
|
+
parts << "\n--- Reference #{idx + 1} ---\n#{doc[:content] || doc['content']}"
|
|
64
|
+
end
|
|
65
|
+
|
|
66
|
+
parts << "\nAdditional context: #{context.inspect}" unless context.empty?
|
|
67
|
+
parts << "\nQuestion: #{question}"
|
|
68
|
+
parts << "\nProvide a professional, detailed response suitable for an RFP submission."
|
|
69
|
+
parts.join("\n")
|
|
70
|
+
end
|
|
71
|
+
|
|
72
|
+
def build_revision_prompt(question:, previous:, feedback:, context:)
|
|
73
|
+
parts = ['You are revising an RFP response based on reviewer feedback.']
|
|
74
|
+
parts << "\nOriginal question: #{question}"
|
|
75
|
+
parts << "\nPrevious answer:\n#{previous}"
|
|
76
|
+
parts << "\nReviewer feedback: #{feedback}"
|
|
77
|
+
parts << "\nAdditional context: #{context.inspect}" unless context.empty?
|
|
78
|
+
parts << "\nProvide an improved response incorporating the feedback."
|
|
79
|
+
parts.join("\n")
|
|
80
|
+
end
|
|
81
|
+
|
|
82
|
+
def call_llm(prompt:, model: nil) # rubocop:disable Lint/UnusedMethodArgument
|
|
83
|
+
if defined?(Legion::LLM)
|
|
84
|
+
result = Legion::LLM.ask(message: prompt)
|
|
85
|
+
result.is_a?(Hash) ? (result[:content] || result[:result] || result.to_s) : result.to_s
|
|
86
|
+
else
|
|
87
|
+
"[LLM not available] Prompt: #{prompt[0..100]}..."
|
|
88
|
+
end
|
|
89
|
+
end
|
|
90
|
+
|
|
91
|
+
include Legion::Extensions::Helpers::Lex if Legion::Extensions.const_defined?(:Helpers) &&
|
|
92
|
+
Legion::Extensions::Helpers.const_defined?(:Lex)
|
|
93
|
+
end
|
|
94
|
+
end
|
|
95
|
+
end
|
|
96
|
+
end
|
|
97
|
+
end
|
|
98
|
+
end
|
|
@@ -0,0 +1,97 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Legion
|
|
4
|
+
module Extensions
|
|
5
|
+
module Rfp
|
|
6
|
+
module Generate
|
|
7
|
+
module Runners
|
|
8
|
+
module Sections
|
|
9
|
+
extend Legion::Extensions::Rfp::Generate::Helpers::Client
|
|
10
|
+
|
|
11
|
+
def generate_section_response(question:, section: nil, context: {}, model: nil, scope: :all, **)
|
|
12
|
+
retrieved = retrieve_section_context(question: question, section: section, scope: scope)
|
|
13
|
+
prompt = build_section_prompt(question: question, section: section, context: context,
|
|
14
|
+
retrieved: retrieved)
|
|
15
|
+
|
|
16
|
+
answer = call_section_llm(prompt: prompt, model: model)
|
|
17
|
+
{
|
|
18
|
+
result: answer,
|
|
19
|
+
section: section,
|
|
20
|
+
question: question,
|
|
21
|
+
context_used: retrieved.length
|
|
22
|
+
}
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
def generate_executive_summary(rfp_text:, company_context: {}, model: nil, **)
|
|
26
|
+
prompt = build_executive_summary_prompt(rfp_text: rfp_text, company_context: company_context)
|
|
27
|
+
answer = call_section_llm(prompt: prompt, model: model)
|
|
28
|
+
{ result: answer, type: :executive_summary }
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
def generate_compliance_matrix(requirements:, capabilities: {}, model: nil, **)
|
|
32
|
+
prompt = build_compliance_prompt(requirements: requirements, capabilities: capabilities)
|
|
33
|
+
answer = call_section_llm(prompt: prompt, model: model)
|
|
34
|
+
{ result: answer, type: :compliance_matrix, requirements_count: requirements.length }
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
private
|
|
38
|
+
|
|
39
|
+
def retrieve_section_context(question:, section:, scope:)
|
|
40
|
+
return [] unless defined?(Legion::Apollo)
|
|
41
|
+
|
|
42
|
+
query = [section, question].compact.join(' - ')
|
|
43
|
+
result = Legion::Apollo.retrieve(query: query, scope: scope, limit: 5)
|
|
44
|
+
result.is_a?(Array) ? result : []
|
|
45
|
+
end
|
|
46
|
+
|
|
47
|
+
def build_section_prompt(question:, section:, context:, retrieved:)
|
|
48
|
+
parts = ['You are writing a specific section of an RFP response.']
|
|
49
|
+
parts << "Section: #{section}" if section
|
|
50
|
+
|
|
51
|
+
retrieved.each_with_index do |doc, idx|
|
|
52
|
+
parts << "\n--- Reference #{idx + 1} ---\n#{doc[:content] || doc['content']}"
|
|
53
|
+
end
|
|
54
|
+
|
|
55
|
+
parts << "\nAdditional context: #{context.inspect}" unless context.empty?
|
|
56
|
+
parts << "\nQuestion: #{question}"
|
|
57
|
+
parts << "\nProvide a focused, professional response for this section."
|
|
58
|
+
parts.join("\n")
|
|
59
|
+
end
|
|
60
|
+
|
|
61
|
+
def build_executive_summary_prompt(rfp_text:, company_context:)
|
|
62
|
+
parts = ['Write an executive summary for the following RFP response.']
|
|
63
|
+
parts << "\nCompany context: #{company_context.inspect}" unless company_context.empty?
|
|
64
|
+
parts << "\nRFP overview:\n#{rfp_text[0..2000]}"
|
|
65
|
+
parts << "\nWrite a compelling 2-3 paragraph executive summary."
|
|
66
|
+
parts.join("\n")
|
|
67
|
+
end
|
|
68
|
+
|
|
69
|
+
def build_compliance_prompt(requirements:, capabilities:)
|
|
70
|
+
parts = ['Generate a compliance matrix for the following requirements.']
|
|
71
|
+
parts << "\nCapabilities: #{capabilities.inspect}" unless capabilities.empty?
|
|
72
|
+
|
|
73
|
+
requirements.each_with_index do |req, idx|
|
|
74
|
+
parts << "#{idx + 1}. #{req[:text] || req}"
|
|
75
|
+
end
|
|
76
|
+
|
|
77
|
+
parts << "\nFor each requirement, indicate: Compliant, Partially Compliant, or Non-Compliant with explanation."
|
|
78
|
+
parts.join("\n")
|
|
79
|
+
end
|
|
80
|
+
|
|
81
|
+
def call_section_llm(prompt:, model: nil) # rubocop:disable Lint/UnusedMethodArgument
|
|
82
|
+
if defined?(Legion::LLM)
|
|
83
|
+
result = Legion::LLM.ask(message: prompt)
|
|
84
|
+
result.is_a?(Hash) ? (result[:content] || result[:result] || result.to_s) : result.to_s
|
|
85
|
+
else
|
|
86
|
+
"[LLM not available] Prompt: #{prompt[0..100]}..."
|
|
87
|
+
end
|
|
88
|
+
end
|
|
89
|
+
|
|
90
|
+
include Legion::Extensions::Helpers::Lex if Legion::Extensions.const_defined?(:Helpers) &&
|
|
91
|
+
Legion::Extensions::Helpers.const_defined?(:Lex)
|
|
92
|
+
end
|
|
93
|
+
end
|
|
94
|
+
end
|
|
95
|
+
end
|
|
96
|
+
end
|
|
97
|
+
end
|
|
@@ -0,0 +1,61 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Legion
|
|
4
|
+
module Extensions
|
|
5
|
+
module Rfp
|
|
6
|
+
module Generate
|
|
7
|
+
module Runners
|
|
8
|
+
module Templates
|
|
9
|
+
extend Legion::Extensions::Rfp::Generate::Helpers::Client
|
|
10
|
+
|
|
11
|
+
DEFAULT_TEMPLATES = {
|
|
12
|
+
standard: { sections: %i[executive_summary company_overview approach timeline pricing], tone: :formal },
|
|
13
|
+
government: { sections: %i[executive_summary compliance technical_approach management staffing pricing],
|
|
14
|
+
tone: :formal },
|
|
15
|
+
healthcare: { sections: %i[executive_summary clinical_approach quality_measures compliance network
|
|
16
|
+
implementation pricing], tone: :formal }
|
|
17
|
+
}.freeze
|
|
18
|
+
|
|
19
|
+
def list_templates(**)
|
|
20
|
+
{ result: DEFAULT_TEMPLATES.keys, count: DEFAULT_TEMPLATES.keys.length }
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
def get_template(name:, **)
|
|
24
|
+
template = DEFAULT_TEMPLATES[name.to_sym]
|
|
25
|
+
return { result: nil, error: "Template not found: #{name}" } unless template
|
|
26
|
+
|
|
27
|
+
{ result: template, name: name }
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
def apply_template(name:, rfp_data:, **)
|
|
31
|
+
template = DEFAULT_TEMPLATES[name.to_sym]
|
|
32
|
+
return { result: nil, error: "Template not found: #{name}" } unless template
|
|
33
|
+
|
|
34
|
+
outline = template[:sections].map do |section|
|
|
35
|
+
{ section: section, tone: template[:tone], content: rfp_data[section] }
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
{ result: outline, template: name, sections: outline.length }
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
def suggest_template(rfp_text:, **)
|
|
42
|
+
text_lower = rfp_text.downcase
|
|
43
|
+
suggested = if text_lower.match?(/\b(?:medicare|medicaid|clinical|hipaa|phi|health)\b/)
|
|
44
|
+
:healthcare
|
|
45
|
+
elsif text_lower.match?(/\b(?:federal|government|agency|cfr|far|dfars)\b/)
|
|
46
|
+
:government
|
|
47
|
+
else
|
|
48
|
+
:standard
|
|
49
|
+
end
|
|
50
|
+
|
|
51
|
+
{ result: suggested, confidence: :heuristic }
|
|
52
|
+
end
|
|
53
|
+
|
|
54
|
+
include Legion::Extensions::Helpers::Lex if Legion::Extensions.const_defined?(:Helpers) &&
|
|
55
|
+
Legion::Extensions::Helpers.const_defined?(:Lex)
|
|
56
|
+
end
|
|
57
|
+
end
|
|
58
|
+
end
|
|
59
|
+
end
|
|
60
|
+
end
|
|
61
|
+
end
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require 'legion/extensions/rfp/generate/helpers/client'
|
|
4
|
+
require 'legion/extensions/rfp/generate/runners/drafts'
|
|
5
|
+
require 'legion/extensions/rfp/generate/runners/sections'
|
|
6
|
+
require 'legion/extensions/rfp/generate/runners/templates'
|
|
7
|
+
require 'legion/extensions/rfp/generate/client'
|
|
8
|
+
|
|
9
|
+
module Legion
|
|
10
|
+
module Extensions
|
|
11
|
+
module Rfp
|
|
12
|
+
module Generate
|
|
13
|
+
end
|
|
14
|
+
end
|
|
15
|
+
end
|
|
16
|
+
end
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative 'helpers/client'
|
|
4
|
+
require_relative 'runners/documents'
|
|
5
|
+
require_relative 'runners/corpus'
|
|
6
|
+
require_relative 'runners/parser'
|
|
7
|
+
|
|
8
|
+
module Legion
|
|
9
|
+
module Extensions
|
|
10
|
+
module Rfp
|
|
11
|
+
module Ingest
|
|
12
|
+
class Client
|
|
13
|
+
include Helpers::Client
|
|
14
|
+
include Runners::Documents
|
|
15
|
+
include Runners::Corpus
|
|
16
|
+
include Runners::Parser
|
|
17
|
+
|
|
18
|
+
attr_reader :opts
|
|
19
|
+
|
|
20
|
+
def initialize(base_url: nil, token: nil, **)
|
|
21
|
+
@opts = { base_url: base_url, token: token }.compact
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
def client(**override)
|
|
25
|
+
super(**@opts, **override)
|
|
26
|
+
end
|
|
27
|
+
end
|
|
28
|
+
end
|
|
29
|
+
end
|
|
30
|
+
end
|
|
31
|
+
end
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require 'faraday'
|
|
4
|
+
|
|
5
|
+
module Legion
|
|
6
|
+
module Extensions
|
|
7
|
+
module Rfp
|
|
8
|
+
module Ingest
|
|
9
|
+
module Helpers
|
|
10
|
+
module Client
|
|
11
|
+
def client(base_url: 'http://localhost:4567', token: nil, **)
|
|
12
|
+
Faraday.new(url: base_url) do |conn|
|
|
13
|
+
conn.request :json
|
|
14
|
+
conn.response :json, content_type: /\bjson$/
|
|
15
|
+
conn.headers['Content-Type'] = 'application/json'
|
|
16
|
+
conn.headers['Authorization'] = "Bearer #{token}" if token
|
|
17
|
+
end
|
|
18
|
+
end
|
|
19
|
+
end
|
|
20
|
+
end
|
|
21
|
+
end
|
|
22
|
+
end
|
|
23
|
+
end
|
|
24
|
+
end
|
|
@@ -0,0 +1,66 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Legion
|
|
4
|
+
module Extensions
|
|
5
|
+
module Rfp
|
|
6
|
+
module Ingest
|
|
7
|
+
module Runners
|
|
8
|
+
module Corpus
|
|
9
|
+
extend Legion::Extensions::Rfp::Ingest::Helpers::Client
|
|
10
|
+
|
|
11
|
+
def ingest_document(file_path:, tags: [], metadata: {}, **)
|
|
12
|
+
supported = supported?(file_path: file_path)
|
|
13
|
+
return { result: nil, error: "Unsupported format: #{file_path}" } unless supported[:result]
|
|
14
|
+
|
|
15
|
+
extracted = extract_text(file_path: file_path)
|
|
16
|
+
chunked = chunk_text(text: extracted[:result])
|
|
17
|
+
|
|
18
|
+
ingested = chunked[:result].map.with_index do |chunk, idx|
|
|
19
|
+
{
|
|
20
|
+
content: chunk[:text],
|
|
21
|
+
source: file_path,
|
|
22
|
+
chunk_id: idx,
|
|
23
|
+
tags: tags,
|
|
24
|
+
metadata: metadata.merge(format: extracted[:format], offset: chunk[:offset])
|
|
25
|
+
}
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
{ result: ingested, count: ingested.length, source: file_path }
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
def ingest_directory(directory:, tags: [], recursive: true, **)
|
|
32
|
+
pattern = recursive ? ::File.join(directory, '**', '*') : ::File.join(directory, '*')
|
|
33
|
+
files = Dir.glob(pattern).select { |f| ::File.file?(f) }
|
|
34
|
+
|
|
35
|
+
results = files.filter_map do |file_path|
|
|
36
|
+
next unless supported?(file_path: file_path)[:result]
|
|
37
|
+
|
|
38
|
+
ingest_document(file_path: file_path, tags: tags)
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
{ result: results, files_processed: results.length, total_chunks: results.sum { |r| r[:count] } }
|
|
42
|
+
end
|
|
43
|
+
|
|
44
|
+
def ingest_to_apollo(chunks:, scope: :global, **)
|
|
45
|
+
return { result: nil, error: 'Apollo not available' } unless defined?(Legion::Apollo)
|
|
46
|
+
|
|
47
|
+
ingested = chunks.map do |chunk|
|
|
48
|
+
Legion::Apollo.ingest(
|
|
49
|
+
content: chunk[:content],
|
|
50
|
+
tags: chunk[:tags] || [],
|
|
51
|
+
metadata: chunk[:metadata] || {},
|
|
52
|
+
scope: scope
|
|
53
|
+
)
|
|
54
|
+
end
|
|
55
|
+
|
|
56
|
+
{ result: ingested, count: ingested.length }
|
|
57
|
+
end
|
|
58
|
+
|
|
59
|
+
include Legion::Extensions::Helpers::Lex if Legion::Extensions.const_defined?(:Helpers) &&
|
|
60
|
+
Legion::Extensions::Helpers.const_defined?(:Lex)
|
|
61
|
+
end
|
|
62
|
+
end
|
|
63
|
+
end
|
|
64
|
+
end
|
|
65
|
+
end
|
|
66
|
+
end
|
|
@@ -0,0 +1,86 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Legion
|
|
4
|
+
module Extensions
|
|
5
|
+
module Rfp
|
|
6
|
+
module Ingest
|
|
7
|
+
module Runners
|
|
8
|
+
module Documents
|
|
9
|
+
extend Legion::Extensions::Rfp::Ingest::Helpers::Client
|
|
10
|
+
|
|
11
|
+
SUPPORTED_FORMATS = %w[pdf docx md markdown xlsx html htm].freeze
|
|
12
|
+
|
|
13
|
+
def supported?(file_path:, **)
|
|
14
|
+
ext = ::File.extname(file_path.to_s).delete('.').downcase
|
|
15
|
+
{ result: SUPPORTED_FORMATS.include?(ext), format: ext }
|
|
16
|
+
end
|
|
17
|
+
|
|
18
|
+
def extract_text(file_path:, format: nil, **)
|
|
19
|
+
fmt = format || ::File.extname(file_path.to_s).delete('.').downcase
|
|
20
|
+
content = case fmt
|
|
21
|
+
when 'pdf' then extract_pdf(file_path)
|
|
22
|
+
when 'docx' then extract_docx(file_path)
|
|
23
|
+
when 'md', 'markdown' then ::File.read(file_path)
|
|
24
|
+
when 'xlsx' then extract_xlsx(file_path)
|
|
25
|
+
when 'html', 'htm' then extract_html(file_path)
|
|
26
|
+
else raise ArgumentError, "Unsupported format: #{fmt}"
|
|
27
|
+
end
|
|
28
|
+
{ result: content, format: fmt, size: content.length }
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
def chunk_text(text:, chunk_size: 1000, overlap: 200, **)
|
|
32
|
+
return { result: [], count: 0 } if text.nil? || text.empty?
|
|
33
|
+
|
|
34
|
+
chunks = []
|
|
35
|
+
pos = 0
|
|
36
|
+
while pos < text.length
|
|
37
|
+
chunk = text[pos, chunk_size]
|
|
38
|
+
chunks << { text: chunk, offset: pos, length: chunk.length }
|
|
39
|
+
pos += (chunk_size - overlap)
|
|
40
|
+
end
|
|
41
|
+
{ result: chunks, count: chunks.length }
|
|
42
|
+
end
|
|
43
|
+
|
|
44
|
+
private
|
|
45
|
+
|
|
46
|
+
def extract_pdf(file_path)
|
|
47
|
+
if defined?(Legion::Data::Extract)
|
|
48
|
+
Legion::Data::Extract.call(file_path, :pdf)
|
|
49
|
+
else
|
|
50
|
+
"[PDF extraction requires legion-data] #{file_path}"
|
|
51
|
+
end
|
|
52
|
+
end
|
|
53
|
+
|
|
54
|
+
def extract_docx(file_path)
|
|
55
|
+
if defined?(Legion::Data::Extract)
|
|
56
|
+
Legion::Data::Extract.call(file_path, :docx)
|
|
57
|
+
else
|
|
58
|
+
"[DOCX extraction requires legion-data] #{file_path}"
|
|
59
|
+
end
|
|
60
|
+
end
|
|
61
|
+
|
|
62
|
+
def extract_xlsx(file_path)
|
|
63
|
+
if defined?(Legion::Data::Extract)
|
|
64
|
+
Legion::Data::Extract.call(file_path, :xlsx)
|
|
65
|
+
else
|
|
66
|
+
"[Excel extraction requires legion-data] #{file_path}"
|
|
67
|
+
end
|
|
68
|
+
end
|
|
69
|
+
|
|
70
|
+
def extract_html(file_path)
|
|
71
|
+
content = ::File.read(file_path)
|
|
72
|
+
content.gsub(%r{<script[^>]*>.*?</script>}mi, '')
|
|
73
|
+
.gsub(%r{<style[^>]*>.*?</style>}mi, '')
|
|
74
|
+
.gsub(/<[^>]+>/, ' ')
|
|
75
|
+
.gsub(/\s+/, ' ')
|
|
76
|
+
.strip
|
|
77
|
+
end
|
|
78
|
+
|
|
79
|
+
include Legion::Extensions::Helpers::Lex if Legion::Extensions.const_defined?(:Helpers) &&
|
|
80
|
+
Legion::Extensions::Helpers.const_defined?(:Lex)
|
|
81
|
+
end
|
|
82
|
+
end
|
|
83
|
+
end
|
|
84
|
+
end
|
|
85
|
+
end
|
|
86
|
+
end
|