cl-magic 0.4.0 → 1.2.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Gemfile.lock +16 -2
- data/cl-magic.gemspec +5 -0
- data/lib/cl/magic/cl +21 -14
- data/lib/cl/magic/cl-ai-chat +117 -0
- data/lib/cl/magic/cl-ai-query +116 -0
- data/lib/cl/magic/cl-ai-store-jira +158 -0
- data/lib/cl/magic/cl-dk +23 -456
- data/lib/cl/magic/cl-dk-make +174 -0
- data/lib/cl/magic/cl-dk-make-world +163 -0
- data/lib/cl/magic/cl-dk-parts +253 -0
- data/lib/cl/magic/cl-dk-world +140 -0
- data/lib/cl/magic/cl-jira-fetch +15 -47
- data/lib/cl/magic/cl-jira-fetch-by-epics +112 -0
- data/lib/cl/magic/cl-jira-to-elastic +126 -0
- data/lib/cl/magic/cl-jira-to-markdown +68 -0
- data/lib/cl/magic/{cl-jira-stats → cl-jira-to-stats} +13 -10
- data/lib/cl/magic/cl-kube-logs +3 -1
- data/lib/cl/magic/common/ai_prompt.rb +169 -0
- data/lib/cl/magic/common/ai_text_splitter.rb +78 -0
- data/lib/cl/magic/common/common_options.rb +1 -1
- data/lib/cl/magic/common/elastic.rb +41 -0
- data/lib/cl/magic/common/jira.rb +169 -42
- data/lib/cl/magic/common/milvus.rb +78 -0
- data/lib/cl/magic/dk/help_printer.rb +29 -0
- data/lib/cl/magic/dk/parts_merger.rb +67 -0
- data/lib/cl/magic/dk/world_settings.rb +52 -0
- data/lib/cl/magic/dk/yaml_arg_munger.rb +107 -0
- data/lib/cl/magic/version.rb +1 -1
- metadata +77 -3
@@ -0,0 +1,112 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
# Fetch jira issues, status changelogs and save them to a file
|
3
|
+
require 'optparse'
|
4
|
+
require 'optparse/subcommand'
|
5
|
+
require 'tty-command'
|
6
|
+
require 'tty-prompt'
|
7
|
+
|
8
|
+
require 'cl/magic/common/common_options.rb'
|
9
|
+
require 'cl/magic/common/logging.rb'
|
10
|
+
require 'cl/magic/common/jira.rb'
|
11
|
+
|
12
|
+
require 'net/http'
|
13
|
+
require 'json'
|
14
|
+
|
15
|
+
@logger = get_logger()
|
16
|
+
|
17
|
+
#
|
18
|
+
# Features
|
19
|
+
#
|
20
|
+
|
21
|
+
def do_work(options)
|
22
|
+
break_at_one_page = false # when developing, set this to true
|
23
|
+
jira = Jira.new options[:base_uri], options[:username], options[:token], break_at_one_page
|
24
|
+
|
25
|
+
@logger.puts ""
|
26
|
+
@logger.wait "fetch epics"
|
27
|
+
epic_ids, epics = jira.get_epic_ids(options[:project], options[:epic_wildcard])
|
28
|
+
|
29
|
+
@logger.puts ""
|
30
|
+
@logger.wait "fetch issues"
|
31
|
+
issues = jira.get_issues_by_epic_ids(options[:project], epic_ids)
|
32
|
+
|
33
|
+
@logger.puts ""
|
34
|
+
@logger.wait "fetch change logs"
|
35
|
+
issues = jira.collect_status_changelogs(jira, issues)
|
36
|
+
|
37
|
+
@logger.puts ""
|
38
|
+
@logger.wait "fetch comments"
|
39
|
+
issues = jira.collect_comments(jira, issues)
|
40
|
+
|
41
|
+
@logger.puts ""
|
42
|
+
puts issues.to_json
|
43
|
+
end
|
44
|
+
|
45
|
+
#
|
46
|
+
# Options
|
47
|
+
#
|
48
|
+
|
49
|
+
options = {}
|
50
|
+
global_banner = <<DOC
|
51
|
+
|
52
|
+
Fetch jira issues, status changelogs and save them to a file
|
53
|
+
|
54
|
+
Usage: cl jira fetch-by-epics [options]
|
55
|
+
|
56
|
+
DOC
|
57
|
+
|
58
|
+
global = OptionParser.new do |g|
|
59
|
+
g.banner = global_banner
|
60
|
+
add_help_and_verbose(g)
|
61
|
+
|
62
|
+
g.on("--base-uri URI", "base uri for jira (ex. https://company.atlassian.net)") do |v|
|
63
|
+
options[:base_uri] = v
|
64
|
+
end
|
65
|
+
|
66
|
+
g.on("-u", "--username USERNAME", "jira username") do |v|
|
67
|
+
options[:username] = v
|
68
|
+
end
|
69
|
+
|
70
|
+
g.on("-t", "--token TOKEN", "jira token (you can create one, google it)") do |v|
|
71
|
+
options[:token] = v
|
72
|
+
end
|
73
|
+
|
74
|
+
g.on("-p", "--project KEY", "jira project to fetch from") do |v|
|
75
|
+
options[:project] = v
|
76
|
+
end
|
77
|
+
|
78
|
+
g.on("-w", "--epic-wildcard TEXT", "wildcard to filter the epics by") do |v|
|
79
|
+
options[:epic_wildcard] = v
|
80
|
+
end
|
81
|
+
|
82
|
+
end
|
83
|
+
|
84
|
+
#
|
85
|
+
# Run
|
86
|
+
#
|
87
|
+
|
88
|
+
@working_dir = ENV['CL_WORKING_DIR'] # passed through cl-magic to here
|
89
|
+
global.parse(ARGV)
|
90
|
+
|
91
|
+
# error on token right away
|
92
|
+
if options[:token].nil?
|
93
|
+
@logger.error "missing --token"
|
94
|
+
exit
|
95
|
+
end
|
96
|
+
|
97
|
+
# prompt for missing options
|
98
|
+
ask_and_store_option(options, :base_uri, "base_uri: ")
|
99
|
+
ask_and_store_option(options, :username, "username: ")
|
100
|
+
ask_and_store_option(options, :project, "project: ")
|
101
|
+
ask_and_store_option(options, :epic_wildcard, "epic_wildcard: ")
|
102
|
+
|
103
|
+
# display full command
|
104
|
+
write_history("""cl jira fetch-by-epics \\
|
105
|
+
--base-uri=#{options[:base_uri]} \\
|
106
|
+
--username=#{options[:username]} \\
|
107
|
+
--project=#{options[:project]} \\
|
108
|
+
--epic-wildcard=#{options[:epic_wildcard]} \\
|
109
|
+
--token REDACTED
|
110
|
+
""")
|
111
|
+
|
112
|
+
do_work(options)
|
@@ -0,0 +1,126 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
# Jira fetch datafile to markdown
|
3
|
+
require 'optparse'
|
4
|
+
require 'optparse/subcommand'
|
5
|
+
require 'tty-command'
|
6
|
+
require 'tty-prompt'
|
7
|
+
require 'active_support/all'
|
8
|
+
|
9
|
+
require 'cl/magic/common/parse_and_pick.rb'
|
10
|
+
require 'cl/magic/common/common_options.rb'
|
11
|
+
require 'cl/magic/common/logging.rb'
|
12
|
+
require 'cl/magic/common/jira.rb'
|
13
|
+
|
14
|
+
require 'json'
|
15
|
+
|
16
|
+
@logger = get_logger()
|
17
|
+
|
18
|
+
|
19
|
+
def post(url, verb, data)
|
20
|
+
cmd = """
|
21
|
+
curl -X#{verb} \
|
22
|
+
#{url} \
|
23
|
+
-H 'Content-Type: application/json' \
|
24
|
+
-d '#{data.to_json}'
|
25
|
+
"""
|
26
|
+
return `#{cmd}`
|
27
|
+
end
|
28
|
+
|
29
|
+
def create_index()
|
30
|
+
url = "#{@ELASTIC_URL}/jira"
|
31
|
+
|
32
|
+
return post(url, "PUT", {
|
33
|
+
"mappings": {
|
34
|
+
"properties": {
|
35
|
+
"text": {
|
36
|
+
"type": "text"
|
37
|
+
}
|
38
|
+
}
|
39
|
+
}
|
40
|
+
})
|
41
|
+
end
|
42
|
+
|
43
|
+
# def do_work(options, data)
|
44
|
+
# #puts create_index()
|
45
|
+
|
46
|
+
# url = "#{@ELASTIC_URL}/jira/_doc/1"
|
47
|
+
# puts post(url, "POST", {
|
48
|
+
# "text": "This is a new issue created in Jira"
|
49
|
+
# })
|
50
|
+
# end
|
51
|
+
|
52
|
+
|
53
|
+
#
|
54
|
+
# Features
|
55
|
+
#
|
56
|
+
|
57
|
+
def do_work(options)
|
58
|
+
filepath = File.join(@working_dir, options[:data_filepath])
|
59
|
+
issues = JSON.parse(File.read(filepath))
|
60
|
+
issues.each do |issue|
|
61
|
+
|
62
|
+
md = []
|
63
|
+
md << "# #{issue['key']}"
|
64
|
+
md << "project: #{issue['fields']['project']['key']}"
|
65
|
+
md << "created: #{issue['fields']['created']}"
|
66
|
+
md << "updated: #{issue['fields']['updated']}"
|
67
|
+
md << "status: #{issue['fields']['status']['statusCategory']['name']}" unless issue['fields']["status"].nil?
|
68
|
+
md << "priority: #{issue['fields']['priority']['name']}"
|
69
|
+
md << "labels: #{issue['fields']['labels'].join(',')}"
|
70
|
+
md << "issue_type: #{issue['fields']['issuetype']['name']}" unless issue['fields']["issuetype"].nil?
|
71
|
+
md << "assignee: #{issue['fields']['assignee']['displayName']}" unless issue['fields']["assignee"].nil?
|
72
|
+
md << ""
|
73
|
+
md << "## Summary:"
|
74
|
+
md << "#{issue['fields']['summary']}"
|
75
|
+
# push to elastic
|
76
|
+
|
77
|
+
md << "## Comments"
|
78
|
+
md << ""
|
79
|
+
issue["comments"].each_with_index do |comment, i|
|
80
|
+
md << "### Comment by #{comment["author"]["displayName"]} "
|
81
|
+
md << ""
|
82
|
+
md << "created: #{comment["created"]}"
|
83
|
+
md << "#{comment["body"].gsub('{noformat}', "\n```\n")}"
|
84
|
+
md << ""
|
85
|
+
end
|
86
|
+
puts md
|
87
|
+
end
|
88
|
+
end
|
89
|
+
|
90
|
+
#
|
91
|
+
# Options
|
92
|
+
#
|
93
|
+
|
94
|
+
options = {}
|
95
|
+
global_banner = <<DOC
|
96
|
+
|
97
|
+
Jira fetch datafile to markdown
|
98
|
+
|
99
|
+
Usage: cl jira to-markdown [options]
|
100
|
+
|
101
|
+
DOC
|
102
|
+
|
103
|
+
global = OptionParser.new do |g|
|
104
|
+
g.banner = global_banner
|
105
|
+
add_help_and_verbose(g)
|
106
|
+
|
107
|
+
g.on("-f", "--data-filepath FILEPATH", "relative path jira datafile") do |v|
|
108
|
+
options[:data_filepath] = v
|
109
|
+
end
|
110
|
+
end
|
111
|
+
|
112
|
+
#
|
113
|
+
# Run
|
114
|
+
#
|
115
|
+
|
116
|
+
@working_dir = ENV['CL_WORKING_DIR'] # passed through cl-magic to here
|
117
|
+
global.parse(ARGV)
|
118
|
+
|
119
|
+
ask_and_store_option(options, :data_filepath, "data_filepath: ")
|
120
|
+
|
121
|
+
# display full command
|
122
|
+
write_history("""cl jira to-markdown \\
|
123
|
+
--data-filepath=#{options[:data_filepath]}
|
124
|
+
""")
|
125
|
+
|
126
|
+
do_work(options)
|
@@ -0,0 +1,68 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
# Jira fetch datafile to markdown
|
3
|
+
require 'optparse'
|
4
|
+
require 'optparse/subcommand'
|
5
|
+
require 'tty-command'
|
6
|
+
require 'tty-prompt'
|
7
|
+
require 'active_support/all'
|
8
|
+
|
9
|
+
require 'cl/magic/common/parse_and_pick.rb'
|
10
|
+
require 'cl/magic/common/common_options.rb'
|
11
|
+
require 'cl/magic/common/logging.rb'
|
12
|
+
require 'cl/magic/common/jira.rb'
|
13
|
+
|
14
|
+
require 'json'
|
15
|
+
|
16
|
+
@logger = get_logger()
|
17
|
+
|
18
|
+
#
|
19
|
+
# Features
|
20
|
+
#
|
21
|
+
|
22
|
+
def do_work(options)
|
23
|
+
filepath = File.join(@working_dir, options[:data_filepath])
|
24
|
+
issues = JSON.parse(File.read(filepath))
|
25
|
+
issues.each do |i|
|
26
|
+
issue_md, comments = Jira.jira_to_markdown(i)
|
27
|
+
puts issue_md
|
28
|
+
puts comments.map{ |o| o[1] }.join("\n")
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
32
|
+
#
|
33
|
+
# Options
|
34
|
+
#
|
35
|
+
|
36
|
+
options = {}
|
37
|
+
global_banner = <<DOC
|
38
|
+
|
39
|
+
Jira fetch datafile to markdown
|
40
|
+
|
41
|
+
Usage: cl jira to-markdown [options]
|
42
|
+
|
43
|
+
DOC
|
44
|
+
|
45
|
+
global = OptionParser.new do |g|
|
46
|
+
g.banner = global_banner
|
47
|
+
add_help_and_verbose(g)
|
48
|
+
|
49
|
+
g.on("-f", "--data-filepath FILEPATH", "relative path jira datafile") do |v|
|
50
|
+
options[:data_filepath] = v
|
51
|
+
end
|
52
|
+
end
|
53
|
+
|
54
|
+
#
|
55
|
+
# Run
|
56
|
+
#
|
57
|
+
|
58
|
+
@working_dir = ENV['CL_WORKING_DIR'] # passed through cl-magic to here
|
59
|
+
global.parse(ARGV)
|
60
|
+
|
61
|
+
ask_and_store_option(options, :data_filepath, "data_filepath: ")
|
62
|
+
|
63
|
+
# display full command
|
64
|
+
write_history("""cl jira to-markdown \\
|
65
|
+
--data-filepath=#{options[:data_filepath]}
|
66
|
+
""")
|
67
|
+
|
68
|
+
do_work(options)
|
@@ -1,5 +1,5 @@
|
|
1
1
|
#!/usr/bin/env ruby
|
2
|
-
#
|
2
|
+
# Jira fetch datafile to stats
|
3
3
|
require 'optparse'
|
4
4
|
require 'optparse/subcommand'
|
5
5
|
require 'tty-command'
|
@@ -14,25 +14,26 @@ require 'net/http'
|
|
14
14
|
require 'json'
|
15
15
|
|
16
16
|
@logger = get_logger()
|
17
|
-
@cl_cmd_name = File.basename(__FILE__).split('-').join(' ')
|
18
17
|
|
19
18
|
#
|
20
19
|
# Features
|
21
20
|
#
|
22
21
|
|
23
22
|
def get_issues_from_datafile(options)
|
24
|
-
|
23
|
+
final_issues = []
|
25
24
|
filepath = File.join(@working_dir, options[:data_filepath])
|
26
|
-
File.
|
27
|
-
|
25
|
+
issues = JSON.parse(File.read(filepath))
|
26
|
+
|
27
|
+
@logger.info "stats for: #{options[:data_filepath]}"
|
28
|
+
issues.each do |issue|
|
28
29
|
issuetype = issue["fields"]["issuetype"]["name"]
|
29
30
|
labels = issue["fields"]["labels"]
|
30
31
|
|
31
32
|
has_excluded_labels = (labels & options[:exclude_labels]).any?
|
32
33
|
is_excluded_issuetype = options[:exclude_issuetypes].include?(issuetype.downcase)
|
33
|
-
|
34
|
+
final_issues << issue unless has_excluded_labels or is_excluded_issuetype
|
34
35
|
end
|
35
|
-
return
|
36
|
+
return final_issues
|
36
37
|
end
|
37
38
|
|
38
39
|
def in_range_issue_stats(issues, start_date, end_date, options)
|
@@ -137,6 +138,8 @@ end
|
|
137
138
|
def do_work(options)
|
138
139
|
issues = get_issues_from_datafile(options)
|
139
140
|
oldest_date = oldest_issue_date(issues).beginning_of_week
|
141
|
+
@logger.info "starting at #{oldest_date}"
|
142
|
+
|
140
143
|
iter_date_range(oldest_date) do |start_date, end_date|
|
141
144
|
stat_hashes = in_range_issue_stats(issues, start_date, end_date, options)
|
142
145
|
counts = print_stats(stat_hashes, start_date, end_date)
|
@@ -153,9 +156,9 @@ options = {
|
|
153
156
|
}
|
154
157
|
global_banner = <<DOC
|
155
158
|
|
156
|
-
|
159
|
+
Jira fetch datafile to stats
|
157
160
|
|
158
|
-
Usage:
|
161
|
+
Usage: cl jira to-stats [options]
|
159
162
|
|
160
163
|
DOC
|
161
164
|
|
@@ -190,7 +193,7 @@ options[:exclude_issuetypes] = [] if options[:exclude_issuetypes].nil?
|
|
190
193
|
options[:exclude_labels] = [] if options[:exclude_labels].nil?
|
191
194
|
|
192
195
|
# display full command
|
193
|
-
write_history("""
|
196
|
+
write_history("""cl jira to-stats \\
|
194
197
|
--data-filepath=#{options[:data_filepath]} \\
|
195
198
|
--exclude-issuetypes=#{options[:exclude_issuetypes].join(',')} \\
|
196
199
|
--exclude-labels=#{options[:exclude_labels].join(',')}
|
data/lib/cl/magic/cl-kube-logs
CHANGED
@@ -17,8 +17,10 @@ require 'cl/magic/common/kubectl.rb'
|
|
17
17
|
# Features
|
18
18
|
#
|
19
19
|
|
20
|
+
|
20
21
|
def do_work(options, pods, containers)
|
21
|
-
|
22
|
+
container_name_regex = "^(#{containers.collect(&:first).join('|')})$"
|
23
|
+
cmd = "kubectl stern '#{pods.collect(&:first).join('|')}' --context #{options[:kube_context]} --namespace #{options[:namespace]} --container '#{container_name_regex}' --since #{options[:since]} --container-state 'running,waiting,terminated'"
|
22
24
|
cmd += " | grep #{options[:grep]}" if options[:grep]
|
23
25
|
|
24
26
|
@logger.puts
|
@@ -0,0 +1,169 @@
|
|
1
|
+
|
2
|
+
require 'json'
|
3
|
+
require 'uri'
|
4
|
+
require 'pp'
|
5
|
+
require 'digest'
|
6
|
+
require 'date'
|
7
|
+
|
8
|
+
require 'tty-progressbar'
|
9
|
+
require 'concurrent'
|
10
|
+
|
11
|
+
require 'cl/magic/common/ai_text_splitter.rb'
|
12
|
+
|
13
|
+
|
14
|
+
class AIPrompt
|
15
|
+
API_COMPLETIONS_PATH = "/openai/v1/chat/completions"
|
16
|
+
API_EMBEDDINGS_PATH = "/openai/v1/embeddings"
|
17
|
+
MAX_THREADS = 10 # set to 1 to debug without concurrency
|
18
|
+
|
19
|
+
def initialize(logger, cache_dir, max_chunk_size=10000, temperature=1)
|
20
|
+
@cache_dir = cache_dir
|
21
|
+
@logger = logger
|
22
|
+
@max_chunk_size = max_chunk_size
|
23
|
+
@temperature = temperature
|
24
|
+
@ai_text_splitter = AITextSplitter.new(@max_chunk_size, @logger)
|
25
|
+
@thread_pool = Concurrent::ThreadPoolExecutor.new(
|
26
|
+
min_threads: 0,
|
27
|
+
max_threads: MAX_THREADS,
|
28
|
+
max_queue: 0,
|
29
|
+
fallback_policy: :caller_runs
|
30
|
+
)
|
31
|
+
end
|
32
|
+
|
33
|
+
def gen_embeddings(input)
|
34
|
+
data = {
|
35
|
+
model: "text-embedding-ada-002",
|
36
|
+
input: input,
|
37
|
+
}
|
38
|
+
response = post_open_ai(API_EMBEDDINGS_PATH, data.to_json)
|
39
|
+
return response["data"][0]["embedding"]
|
40
|
+
end
|
41
|
+
|
42
|
+
def prompt(raw_data, prompt, split_as_markdown=false, separator)
|
43
|
+
|
44
|
+
# split
|
45
|
+
split_data = @ai_text_splitter.split(raw_data, split_as_markdown, separator)
|
46
|
+
|
47
|
+
# summarize
|
48
|
+
responses = summarize_split_text(split_data, prompt, split_as_markdown)
|
49
|
+
|
50
|
+
# map and return
|
51
|
+
return responses.collect do |json|
|
52
|
+
json["choices"].map {|c| c["message"]["content"]}.join("\n")
|
53
|
+
end
|
54
|
+
end
|
55
|
+
|
56
|
+
def clear_cache()
|
57
|
+
Dir.glob(File.join(get_cache_path, '*.json')).each do |file|
|
58
|
+
File.delete(file)
|
59
|
+
end
|
60
|
+
end
|
61
|
+
|
62
|
+
private
|
63
|
+
|
64
|
+
def do_concurently
|
65
|
+
if MAX_THREADS > 1
|
66
|
+
@thread_pool.post do
|
67
|
+
yield
|
68
|
+
end
|
69
|
+
else
|
70
|
+
yield
|
71
|
+
end
|
72
|
+
end
|
73
|
+
|
74
|
+
def wait_concurrently
|
75
|
+
if MAX_THREADS > 1
|
76
|
+
@thread_pool.shutdown
|
77
|
+
@thread_pool.wait_for_termination
|
78
|
+
end
|
79
|
+
end
|
80
|
+
|
81
|
+
def munge_prompt(text, prompt)
|
82
|
+
final_prompt = "#{prompt}"
|
83
|
+
|
84
|
+
if text.length > @max_chunk_size
|
85
|
+
half = text.length / 2
|
86
|
+
final_prompt = "#{prompt}. Summarize it and keep it under #{half} characters"
|
87
|
+
end
|
88
|
+
|
89
|
+
return final_prompt
|
90
|
+
end
|
91
|
+
|
92
|
+
def summarize_split_text(split_text, prompt, split_as_markdown)
|
93
|
+
|
94
|
+
bar = TTY::ProgressBar.new("processing #{split_text.count} chunks [:bar]", total: split_text.count)
|
95
|
+
|
96
|
+
json_responses = []
|
97
|
+
split_text.each do |text|
|
98
|
+
do_concurently do
|
99
|
+
final_prompt = munge_prompt(text, prompt)
|
100
|
+
messages = [
|
101
|
+
{ role: "user", content: final_prompt },
|
102
|
+
{ role: "user", content: text }
|
103
|
+
]
|
104
|
+
json_responses << post_open_ai(API_COMPLETIONS_PATH, {
|
105
|
+
messages: messages
|
106
|
+
}.to_json)
|
107
|
+
bar.advance
|
108
|
+
end
|
109
|
+
end
|
110
|
+
|
111
|
+
# wait
|
112
|
+
wait_concurrently
|
113
|
+
return json_responses
|
114
|
+
end
|
115
|
+
|
116
|
+
def post_open_ai(endpoint, data)
|
117
|
+
# url
|
118
|
+
api_url = ENV["OPENAPI_URL"]
|
119
|
+
final_url = URI.join(api_url, endpoint)
|
120
|
+
|
121
|
+
# data
|
122
|
+
sanitized_data = data.gsub("'", "")
|
123
|
+
|
124
|
+
# post
|
125
|
+
api_key = ENV["OPENAPI_KEY"]
|
126
|
+
cmd = """
|
127
|
+
curl -s -X POST \
|
128
|
+
'#{final_url}' \
|
129
|
+
-H 'Content-Type: application/json' \
|
130
|
+
-H 'Authorization: Bearer #{api_key}' \
|
131
|
+
-d '#{sanitized_data}'
|
132
|
+
"""
|
133
|
+
response_text = `#{cmd}`
|
134
|
+
begin
|
135
|
+
timestamp = DateTime.now.strftime("%Y%m%d%H%M%S")
|
136
|
+
response_hash = JSON.parse(response_text)
|
137
|
+
|
138
|
+
# completions
|
139
|
+
raise if endpoint == API_COMPLETIONS_PATH and not response_hash.key?("choices")
|
140
|
+
|
141
|
+
# cache
|
142
|
+
save_to_cache(sanitized_data, timestamp, "request")
|
143
|
+
save_to_cache(response_text, timestamp, "response")
|
144
|
+
|
145
|
+
# response
|
146
|
+
return response_hash
|
147
|
+
rescue => e
|
148
|
+
#@logger.error e
|
149
|
+
@logger.error response_text
|
150
|
+
exit
|
151
|
+
end
|
152
|
+
end
|
153
|
+
|
154
|
+
def get_cache_path
|
155
|
+
cache_path = File.join(@cache_dir, ".open_ai_cache")
|
156
|
+
Dir.mkdir(cache_path) if !File.directory?(cache_path)
|
157
|
+
return cache_path
|
158
|
+
end
|
159
|
+
|
160
|
+
def save_to_cache(json_string, timestamp, postfix)
|
161
|
+
unless @cache_dir.nil?
|
162
|
+
current_datetime = DateTime.now.strftime("%Y%m%d%H%M%S")
|
163
|
+
filepath = File.join(get_cache_path, "#{current_datetime}_#{postfix}.json")
|
164
|
+
File.open(filepath, "w") do |file|
|
165
|
+
file.write(JSON.pretty_generate(JSON.parse(json_string)))
|
166
|
+
end
|
167
|
+
end
|
168
|
+
end
|
169
|
+
end
|
@@ -0,0 +1,78 @@
|
|
1
|
+
require 'baran'
|
2
|
+
|
3
|
+
class AITextSplitter
|
4
|
+
|
5
|
+
def initialize(max_chunk_size, logger)
|
6
|
+
@max_chunk_size = max_chunk_size
|
7
|
+
@cut_off = (@max_chunk_size + (@max_chunk_size * 0.1)).floor
|
8
|
+
@logger = logger
|
9
|
+
end
|
10
|
+
|
11
|
+
def split(data, split_as_markdown=false, separator)
|
12
|
+
return markdown_to_array(data) if split_as_markdown
|
13
|
+
return basic_split_then_reduce(data, separator)
|
14
|
+
end
|
15
|
+
|
16
|
+
private
|
17
|
+
|
18
|
+
#
|
19
|
+
# Separator Varients
|
20
|
+
#
|
21
|
+
|
22
|
+
# basic splitter, would lose context when splits got too small
|
23
|
+
def basic_splitter(data, separator)
|
24
|
+
separator = "\n\n" if separator.nil? or separator.empty?
|
25
|
+
splitter = Baran::CharacterTextSplitter.new(chunk_size: @max_chunk_size, chunk_overlap: 64, separator: separator)
|
26
|
+
chunks = splitter.chunks(data).collect {|c| c[:text]}
|
27
|
+
return reduce_to_max_size(chunks)
|
28
|
+
end
|
29
|
+
|
30
|
+
# Preferred: provides ever better context through insisting on splits near max_chunk_size
|
31
|
+
def basic_split_then_reduce(data, separator)
|
32
|
+
chunks = basic_splitter(data, separator)
|
33
|
+
return reduce_to_max_size(chunks)
|
34
|
+
end
|
35
|
+
|
36
|
+
# User can hint at split points; it didn't work great
|
37
|
+
def recursive_splitter(data, separator)
|
38
|
+
separator = ([separator] + ["\n\n"]).compact
|
39
|
+
splitter = Baran::RecursiveCharacterTextSplitter.new(
|
40
|
+
chunk_size: @max_chunk_size, chunk_overlap: 64,
|
41
|
+
separators: separator
|
42
|
+
)
|
43
|
+
chunks = splitter.chunks(data).collect {|c| c[:text]}
|
44
|
+
return reduce_to_max_size(chunks)
|
45
|
+
end
|
46
|
+
|
47
|
+
#
|
48
|
+
# Markdown
|
49
|
+
#
|
50
|
+
|
51
|
+
def markdown_to_array(data)
|
52
|
+
splitter = Baran::MarkdownSplitter.new()
|
53
|
+
return splitter.chunks(data).collect {|c| c[:text]}
|
54
|
+
end
|
55
|
+
|
56
|
+
#
|
57
|
+
# Splitting is done by separator and the LLM can respond
|
58
|
+
# with content of any length. Let's reduce the chunks by
|
59
|
+
# combining smaller responses up to @max_chunk_size
|
60
|
+
#
|
61
|
+
|
62
|
+
def reduce_to_max_size(chunks)
|
63
|
+
combined = []
|
64
|
+
i = 0
|
65
|
+
while i < chunks.length
|
66
|
+
c = chunks[i]
|
67
|
+
n = chunks[i + 1]
|
68
|
+
unless n.nil? or (c.length + n.length) > @cut_off
|
69
|
+
combined << [c, n].join("\n")
|
70
|
+
i += 2
|
71
|
+
else
|
72
|
+
combined << c
|
73
|
+
i += 1
|
74
|
+
end
|
75
|
+
end
|
76
|
+
combined
|
77
|
+
end
|
78
|
+
end
|
@@ -0,0 +1,41 @@
|
|
1
|
+
class Elastic
|
2
|
+
def initialize(elastic_url)
|
3
|
+
@elastic_url = elastic_url
|
4
|
+
end
|
5
|
+
|
6
|
+
def query_by_id(ids)
|
7
|
+
url = "/_search"
|
8
|
+
verb = "POST"
|
9
|
+
data = {
|
10
|
+
query: {
|
11
|
+
terms: {
|
12
|
+
_id: ids
|
13
|
+
}
|
14
|
+
}
|
15
|
+
}
|
16
|
+
sanitized_data = data.to_json
|
17
|
+
return post(url, verb, data)
|
18
|
+
end
|
19
|
+
|
20
|
+
def post(url, verb, data)
|
21
|
+
final_url = "#{@elastic_url}/#{url}"
|
22
|
+
|
23
|
+
# sanitize
|
24
|
+
sanitized_data = data.to_json
|
25
|
+
["'", "’"].each { |c| sanitized_data.gsub!(c, "\#{c}") }
|
26
|
+
|
27
|
+
# post
|
28
|
+
cmd = """
|
29
|
+
curl -s -X#{verb} \
|
30
|
+
#{final_url} \
|
31
|
+
-H 'Content-Type: application/json' \
|
32
|
+
-d '#{sanitized_data}'
|
33
|
+
"""
|
34
|
+
return `#{cmd}`
|
35
|
+
end
|
36
|
+
|
37
|
+
def create_index(elastic_index, body)
|
38
|
+
url = "#{elastic_index}"
|
39
|
+
return post(url, "PUT", body)
|
40
|
+
end
|
41
|
+
end
|