cl-magic 0.4.0 → 1.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Gemfile.lock +16 -2
- data/cl-magic.gemspec +5 -0
- data/lib/cl/magic/cl +21 -14
- data/lib/cl/magic/cl-ai-chat +117 -0
- data/lib/cl/magic/cl-ai-query +116 -0
- data/lib/cl/magic/cl-ai-store-jira +158 -0
- data/lib/cl/magic/cl-dk +23 -456
- data/lib/cl/magic/cl-dk-make +174 -0
- data/lib/cl/magic/cl-dk-make-world +163 -0
- data/lib/cl/magic/cl-dk-parts +253 -0
- data/lib/cl/magic/cl-dk-world +140 -0
- data/lib/cl/magic/cl-jira-fetch +15 -47
- data/lib/cl/magic/cl-jira-fetch-by-epics +112 -0
- data/lib/cl/magic/cl-jira-to-elastic +126 -0
- data/lib/cl/magic/cl-jira-to-markdown +68 -0
- data/lib/cl/magic/{cl-jira-stats → cl-jira-to-stats} +13 -10
- data/lib/cl/magic/cl-kube-logs +3 -1
- data/lib/cl/magic/common/ai_prompt.rb +169 -0
- data/lib/cl/magic/common/ai_text_splitter.rb +78 -0
- data/lib/cl/magic/common/common_options.rb +1 -1
- data/lib/cl/magic/common/elastic.rb +41 -0
- data/lib/cl/magic/common/jira.rb +169 -42
- data/lib/cl/magic/common/milvus.rb +78 -0
- data/lib/cl/magic/dk/help_printer.rb +29 -0
- data/lib/cl/magic/dk/parts_merger.rb +67 -0
- data/lib/cl/magic/dk/world_settings.rb +52 -0
- data/lib/cl/magic/dk/yaml_arg_munger.rb +107 -0
- data/lib/cl/magic/version.rb +1 -1
- metadata +77 -3
@@ -0,0 +1,112 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
# Fetch jira issues, status changelogs and save them to a file
|
3
|
+
require 'optparse'
|
4
|
+
require 'optparse/subcommand'
|
5
|
+
require 'tty-command'
|
6
|
+
require 'tty-prompt'
|
7
|
+
|
8
|
+
require 'cl/magic/common/common_options.rb'
|
9
|
+
require 'cl/magic/common/logging.rb'
|
10
|
+
require 'cl/magic/common/jira.rb'
|
11
|
+
|
12
|
+
require 'net/http'
|
13
|
+
require 'json'
|
14
|
+
|
15
|
+
@logger = get_logger()
|
16
|
+
|
17
|
+
#
|
18
|
+
# Features
|
19
|
+
#
|
20
|
+
|
21
|
+
def do_work(options)
|
22
|
+
break_at_one_page = false # when developing, set this to true
|
23
|
+
jira = Jira.new options[:base_uri], options[:username], options[:token], break_at_one_page
|
24
|
+
|
25
|
+
@logger.puts ""
|
26
|
+
@logger.wait "fetch epics"
|
27
|
+
epic_ids, epics = jira.get_epic_ids(options[:project], options[:epic_wildcard])
|
28
|
+
|
29
|
+
@logger.puts ""
|
30
|
+
@logger.wait "fetch issues"
|
31
|
+
issues = jira.get_issues_by_epic_ids(options[:project], epic_ids)
|
32
|
+
|
33
|
+
@logger.puts ""
|
34
|
+
@logger.wait "fetch change logs"
|
35
|
+
issues = jira.collect_status_changelogs(jira, issues)
|
36
|
+
|
37
|
+
@logger.puts ""
|
38
|
+
@logger.wait "fetch comments"
|
39
|
+
issues = jira.collect_comments(jira, issues)
|
40
|
+
|
41
|
+
@logger.puts ""
|
42
|
+
puts issues.to_json
|
43
|
+
end
|
44
|
+
|
45
|
+
#
|
46
|
+
# Options
|
47
|
+
#
|
48
|
+
|
49
|
+
options = {}
|
50
|
+
global_banner = <<DOC
|
51
|
+
|
52
|
+
Fetch jira issues, status changelogs and save them to a file
|
53
|
+
|
54
|
+
Usage: cl jira fetch-by-epics [options]
|
55
|
+
|
56
|
+
DOC
|
57
|
+
|
58
|
+
global = OptionParser.new do |g|
|
59
|
+
g.banner = global_banner
|
60
|
+
add_help_and_verbose(g)
|
61
|
+
|
62
|
+
g.on("--base-uri URI", "base uri for jira (ex. https://company.atlassian.net)") do |v|
|
63
|
+
options[:base_uri] = v
|
64
|
+
end
|
65
|
+
|
66
|
+
g.on("-u", "--username USERNAME", "jira username") do |v|
|
67
|
+
options[:username] = v
|
68
|
+
end
|
69
|
+
|
70
|
+
g.on("-t", "--token TOKEN", "jira token (you can create one, google it)") do |v|
|
71
|
+
options[:token] = v
|
72
|
+
end
|
73
|
+
|
74
|
+
g.on("-p", "--project KEY", "jira project to fetch from") do |v|
|
75
|
+
options[:project] = v
|
76
|
+
end
|
77
|
+
|
78
|
+
g.on("-w", "--epic-wildcard TEXT", "wildcard to filter the epics by") do |v|
|
79
|
+
options[:epic_wildcard] = v
|
80
|
+
end
|
81
|
+
|
82
|
+
end
|
83
|
+
|
84
|
+
#
|
85
|
+
# Run
|
86
|
+
#
|
87
|
+
|
88
|
+
@working_dir = ENV['CL_WORKING_DIR'] # passed through cl-magic to here
|
89
|
+
global.parse(ARGV)
|
90
|
+
|
91
|
+
# error on token right away
|
92
|
+
if options[:token].nil?
|
93
|
+
@logger.error "missing --token"
|
94
|
+
exit
|
95
|
+
end
|
96
|
+
|
97
|
+
# prompt for missing options
|
98
|
+
ask_and_store_option(options, :base_uri, "base_uri: ")
|
99
|
+
ask_and_store_option(options, :username, "username: ")
|
100
|
+
ask_and_store_option(options, :project, "project: ")
|
101
|
+
ask_and_store_option(options, :epic_wildcard, "epic_wildcard: ")
|
102
|
+
|
103
|
+
# display full command
|
104
|
+
write_history("""cl jira fetch-by-epics \\
|
105
|
+
--base-uri=#{options[:base_uri]} \\
|
106
|
+
--username=#{options[:username]} \\
|
107
|
+
--project=#{options[:project]} \\
|
108
|
+
--epic-wildcard=#{options[:epic_wildcard]} \\
|
109
|
+
--token REDACTED
|
110
|
+
""")
|
111
|
+
|
112
|
+
do_work(options)
|
@@ -0,0 +1,126 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
# Jira fetch datafile to markdown
|
3
|
+
require 'optparse'
|
4
|
+
require 'optparse/subcommand'
|
5
|
+
require 'tty-command'
|
6
|
+
require 'tty-prompt'
|
7
|
+
require 'active_support/all'
|
8
|
+
|
9
|
+
require 'cl/magic/common/parse_and_pick.rb'
|
10
|
+
require 'cl/magic/common/common_options.rb'
|
11
|
+
require 'cl/magic/common/logging.rb'
|
12
|
+
require 'cl/magic/common/jira.rb'
|
13
|
+
|
14
|
+
require 'json'
|
15
|
+
|
16
|
+
@logger = get_logger()
|
17
|
+
|
18
|
+
|
19
|
+
def post(url, verb, data)
|
20
|
+
cmd = """
|
21
|
+
curl -X#{verb} \
|
22
|
+
#{url} \
|
23
|
+
-H 'Content-Type: application/json' \
|
24
|
+
-d '#{data.to_json}'
|
25
|
+
"""
|
26
|
+
return `#{cmd}`
|
27
|
+
end
|
28
|
+
|
29
|
+
def create_index()
|
30
|
+
url = "#{@ELASTIC_URL}/jira"
|
31
|
+
|
32
|
+
return post(url, "PUT", {
|
33
|
+
"mappings": {
|
34
|
+
"properties": {
|
35
|
+
"text": {
|
36
|
+
"type": "text"
|
37
|
+
}
|
38
|
+
}
|
39
|
+
}
|
40
|
+
})
|
41
|
+
end
|
42
|
+
|
43
|
+
# def do_work(options, data)
|
44
|
+
# #puts create_index()
|
45
|
+
|
46
|
+
# url = "#{@ELASTIC_URL}/jira/_doc/1"
|
47
|
+
# puts post(url, "POST", {
|
48
|
+
# "text": "This is a new issue created in Jira"
|
49
|
+
# })
|
50
|
+
# end
|
51
|
+
|
52
|
+
|
53
|
+
#
|
54
|
+
# Features
|
55
|
+
#
|
56
|
+
|
57
|
+
def do_work(options)
|
58
|
+
filepath = File.join(@working_dir, options[:data_filepath])
|
59
|
+
issues = JSON.parse(File.read(filepath))
|
60
|
+
issues.each do |issue|
|
61
|
+
|
62
|
+
md = []
|
63
|
+
md << "# #{issue['key']}"
|
64
|
+
md << "project: #{issue['fields']['project']['key']}"
|
65
|
+
md << "created: #{issue['fields']['created']}"
|
66
|
+
md << "updated: #{issue['fields']['updated']}"
|
67
|
+
md << "status: #{issue['fields']['status']['statusCategory']['name']}" unless issue['fields']["status"].nil?
|
68
|
+
md << "priority: #{issue['fields']['priority']['name']}"
|
69
|
+
md << "labels: #{issue['fields']['labels'].join(',')}"
|
70
|
+
md << "issue_type: #{issue['fields']['issuetype']['name']}" unless issue['fields']["issuetype"].nil?
|
71
|
+
md << "assignee: #{issue['fields']['assignee']['displayName']}" unless issue['fields']["assignee"].nil?
|
72
|
+
md << ""
|
73
|
+
md << "## Summary:"
|
74
|
+
md << "#{issue['fields']['summary']}"
|
75
|
+
# push to elastic
|
76
|
+
|
77
|
+
md << "## Comments"
|
78
|
+
md << ""
|
79
|
+
issue["comments"].each_with_index do |comment, i|
|
80
|
+
md << "### Comment by #{comment["author"]["displayName"]} "
|
81
|
+
md << ""
|
82
|
+
md << "created: #{comment["created"]}"
|
83
|
+
md << "#{comment["body"].gsub('{noformat}', "\n```\n")}"
|
84
|
+
md << ""
|
85
|
+
end
|
86
|
+
puts md
|
87
|
+
end
|
88
|
+
end
|
89
|
+
|
90
|
+
#
|
91
|
+
# Options
|
92
|
+
#
|
93
|
+
|
94
|
+
options = {}
|
95
|
+
global_banner = <<DOC
|
96
|
+
|
97
|
+
Jira fetch datafile to markdown
|
98
|
+
|
99
|
+
Usage: cl jira to-markdown [options]
|
100
|
+
|
101
|
+
DOC
|
102
|
+
|
103
|
+
global = OptionParser.new do |g|
|
104
|
+
g.banner = global_banner
|
105
|
+
add_help_and_verbose(g)
|
106
|
+
|
107
|
+
g.on("-f", "--data-filepath FILEPATH", "relative path jira datafile") do |v|
|
108
|
+
options[:data_filepath] = v
|
109
|
+
end
|
110
|
+
end
|
111
|
+
|
112
|
+
#
|
113
|
+
# Run
|
114
|
+
#
|
115
|
+
|
116
|
+
@working_dir = ENV['CL_WORKING_DIR'] # passed through cl-magic to here
|
117
|
+
global.parse(ARGV)
|
118
|
+
|
119
|
+
ask_and_store_option(options, :data_filepath, "data_filepath: ")
|
120
|
+
|
121
|
+
# display full command
|
122
|
+
write_history("""cl jira to-markdown \\
|
123
|
+
--data-filepath=#{options[:data_filepath]}
|
124
|
+
""")
|
125
|
+
|
126
|
+
do_work(options)
|
@@ -0,0 +1,68 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
# Jira fetch datafile to markdown
|
3
|
+
require 'optparse'
|
4
|
+
require 'optparse/subcommand'
|
5
|
+
require 'tty-command'
|
6
|
+
require 'tty-prompt'
|
7
|
+
require 'active_support/all'
|
8
|
+
|
9
|
+
require 'cl/magic/common/parse_and_pick.rb'
|
10
|
+
require 'cl/magic/common/common_options.rb'
|
11
|
+
require 'cl/magic/common/logging.rb'
|
12
|
+
require 'cl/magic/common/jira.rb'
|
13
|
+
|
14
|
+
require 'json'
|
15
|
+
|
16
|
+
@logger = get_logger()
|
17
|
+
|
18
|
+
#
|
19
|
+
# Features
|
20
|
+
#
|
21
|
+
|
22
|
+
def do_work(options)
|
23
|
+
filepath = File.join(@working_dir, options[:data_filepath])
|
24
|
+
issues = JSON.parse(File.read(filepath))
|
25
|
+
issues.each do |i|
|
26
|
+
issue_md, comments = Jira.jira_to_markdown(i)
|
27
|
+
puts issue_md
|
28
|
+
puts comments.map{ |o| o[1] }.join("\n")
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
32
|
+
#
|
33
|
+
# Options
|
34
|
+
#
|
35
|
+
|
36
|
+
options = {}
|
37
|
+
global_banner = <<DOC
|
38
|
+
|
39
|
+
Jira fetch datafile to markdown
|
40
|
+
|
41
|
+
Usage: cl jira to-markdown [options]
|
42
|
+
|
43
|
+
DOC
|
44
|
+
|
45
|
+
global = OptionParser.new do |g|
|
46
|
+
g.banner = global_banner
|
47
|
+
add_help_and_verbose(g)
|
48
|
+
|
49
|
+
g.on("-f", "--data-filepath FILEPATH", "relative path jira datafile") do |v|
|
50
|
+
options[:data_filepath] = v
|
51
|
+
end
|
52
|
+
end
|
53
|
+
|
54
|
+
#
|
55
|
+
# Run
|
56
|
+
#
|
57
|
+
|
58
|
+
@working_dir = ENV['CL_WORKING_DIR'] # passed through cl-magic to here
|
59
|
+
global.parse(ARGV)
|
60
|
+
|
61
|
+
ask_and_store_option(options, :data_filepath, "data_filepath: ")
|
62
|
+
|
63
|
+
# display full command
|
64
|
+
write_history("""cl jira to-markdown \\
|
65
|
+
--data-filepath=#{options[:data_filepath]}
|
66
|
+
""")
|
67
|
+
|
68
|
+
do_work(options)
|
@@ -1,5 +1,5 @@
|
|
1
1
|
#!/usr/bin/env ruby
|
2
|
-
#
|
2
|
+
# Jira fetch datafile to stats
|
3
3
|
require 'optparse'
|
4
4
|
require 'optparse/subcommand'
|
5
5
|
require 'tty-command'
|
@@ -14,25 +14,26 @@ require 'net/http'
|
|
14
14
|
require 'json'
|
15
15
|
|
16
16
|
@logger = get_logger()
|
17
|
-
@cl_cmd_name = File.basename(__FILE__).split('-').join(' ')
|
18
17
|
|
19
18
|
#
|
20
19
|
# Features
|
21
20
|
#
|
22
21
|
|
23
22
|
def get_issues_from_datafile(options)
|
24
|
-
|
23
|
+
final_issues = []
|
25
24
|
filepath = File.join(@working_dir, options[:data_filepath])
|
26
|
-
File.
|
27
|
-
|
25
|
+
issues = JSON.parse(File.read(filepath))
|
26
|
+
|
27
|
+
@logger.info "stats for: #{options[:data_filepath]}"
|
28
|
+
issues.each do |issue|
|
28
29
|
issuetype = issue["fields"]["issuetype"]["name"]
|
29
30
|
labels = issue["fields"]["labels"]
|
30
31
|
|
31
32
|
has_excluded_labels = (labels & options[:exclude_labels]).any?
|
32
33
|
is_excluded_issuetype = options[:exclude_issuetypes].include?(issuetype.downcase)
|
33
|
-
|
34
|
+
final_issues << issue unless has_excluded_labels or is_excluded_issuetype
|
34
35
|
end
|
35
|
-
return
|
36
|
+
return final_issues
|
36
37
|
end
|
37
38
|
|
38
39
|
def in_range_issue_stats(issues, start_date, end_date, options)
|
@@ -137,6 +138,8 @@ end
|
|
137
138
|
def do_work(options)
|
138
139
|
issues = get_issues_from_datafile(options)
|
139
140
|
oldest_date = oldest_issue_date(issues).beginning_of_week
|
141
|
+
@logger.info "starting at #{oldest_date}"
|
142
|
+
|
140
143
|
iter_date_range(oldest_date) do |start_date, end_date|
|
141
144
|
stat_hashes = in_range_issue_stats(issues, start_date, end_date, options)
|
142
145
|
counts = print_stats(stat_hashes, start_date, end_date)
|
@@ -153,9 +156,9 @@ options = {
|
|
153
156
|
}
|
154
157
|
global_banner = <<DOC
|
155
158
|
|
156
|
-
|
159
|
+
Jira fetch datafile to stats
|
157
160
|
|
158
|
-
Usage:
|
161
|
+
Usage: cl jira to-stats [options]
|
159
162
|
|
160
163
|
DOC
|
161
164
|
|
@@ -190,7 +193,7 @@ options[:exclude_issuetypes] = [] if options[:exclude_issuetypes].nil?
|
|
190
193
|
options[:exclude_labels] = [] if options[:exclude_labels].nil?
|
191
194
|
|
192
195
|
# display full command
|
193
|
-
write_history("""
|
196
|
+
write_history("""cl jira to-stats \\
|
194
197
|
--data-filepath=#{options[:data_filepath]} \\
|
195
198
|
--exclude-issuetypes=#{options[:exclude_issuetypes].join(',')} \\
|
196
199
|
--exclude-labels=#{options[:exclude_labels].join(',')}
|
data/lib/cl/magic/cl-kube-logs
CHANGED
@@ -17,8 +17,10 @@ require 'cl/magic/common/kubectl.rb'
|
|
17
17
|
# Features
|
18
18
|
#
|
19
19
|
|
20
|
+
|
20
21
|
def do_work(options, pods, containers)
|
21
|
-
|
22
|
+
container_name_regex = "^(#{containers.collect(&:first).join('|')})$"
|
23
|
+
cmd = "kubectl stern '#{pods.collect(&:first).join('|')}' --context #{options[:kube_context]} --namespace #{options[:namespace]} --container '#{container_name_regex}' --since #{options[:since]} --container-state 'running,waiting,terminated'"
|
22
24
|
cmd += " | grep #{options[:grep]}" if options[:grep]
|
23
25
|
|
24
26
|
@logger.puts
|
@@ -0,0 +1,169 @@
|
|
1
|
+
|
2
|
+
require 'json'
|
3
|
+
require 'uri'
|
4
|
+
require 'pp'
|
5
|
+
require 'digest'
|
6
|
+
require 'date'
|
7
|
+
|
8
|
+
require 'tty-progressbar'
|
9
|
+
require 'concurrent'
|
10
|
+
|
11
|
+
require 'cl/magic/common/ai_text_splitter.rb'
|
12
|
+
|
13
|
+
|
14
|
+
class AIPrompt
|
15
|
+
API_COMPLETIONS_PATH = "/openai/v1/chat/completions"
|
16
|
+
API_EMBEDDINGS_PATH = "/openai/v1/embeddings"
|
17
|
+
MAX_THREADS = 10 # set to 1 to debug without concurrency
|
18
|
+
|
19
|
+
def initialize(logger, cache_dir, max_chunk_size=10000, temperature=1)
|
20
|
+
@cache_dir = cache_dir
|
21
|
+
@logger = logger
|
22
|
+
@max_chunk_size = max_chunk_size
|
23
|
+
@temperature = temperature
|
24
|
+
@ai_text_splitter = AITextSplitter.new(@max_chunk_size, @logger)
|
25
|
+
@thread_pool = Concurrent::ThreadPoolExecutor.new(
|
26
|
+
min_threads: 0,
|
27
|
+
max_threads: MAX_THREADS,
|
28
|
+
max_queue: 0,
|
29
|
+
fallback_policy: :caller_runs
|
30
|
+
)
|
31
|
+
end
|
32
|
+
|
33
|
+
def gen_embeddings(input)
|
34
|
+
data = {
|
35
|
+
model: "text-embedding-ada-002",
|
36
|
+
input: input,
|
37
|
+
}
|
38
|
+
response = post_open_ai(API_EMBEDDINGS_PATH, data.to_json)
|
39
|
+
return response["data"][0]["embedding"]
|
40
|
+
end
|
41
|
+
|
42
|
+
def prompt(raw_data, prompt, split_as_markdown=false, separator)
|
43
|
+
|
44
|
+
# split
|
45
|
+
split_data = @ai_text_splitter.split(raw_data, split_as_markdown, separator)
|
46
|
+
|
47
|
+
# summarize
|
48
|
+
responses = summarize_split_text(split_data, prompt, split_as_markdown)
|
49
|
+
|
50
|
+
# map and return
|
51
|
+
return responses.collect do |json|
|
52
|
+
json["choices"].map {|c| c["message"]["content"]}.join("\n")
|
53
|
+
end
|
54
|
+
end
|
55
|
+
|
56
|
+
def clear_cache()
|
57
|
+
Dir.glob(File.join(get_cache_path, '*.json')).each do |file|
|
58
|
+
File.delete(file)
|
59
|
+
end
|
60
|
+
end
|
61
|
+
|
62
|
+
private
|
63
|
+
|
64
|
+
def do_concurently
|
65
|
+
if MAX_THREADS > 1
|
66
|
+
@thread_pool.post do
|
67
|
+
yield
|
68
|
+
end
|
69
|
+
else
|
70
|
+
yield
|
71
|
+
end
|
72
|
+
end
|
73
|
+
|
74
|
+
def wait_concurrently
|
75
|
+
if MAX_THREADS > 1
|
76
|
+
@thread_pool.shutdown
|
77
|
+
@thread_pool.wait_for_termination
|
78
|
+
end
|
79
|
+
end
|
80
|
+
|
81
|
+
def munge_prompt(text, prompt)
|
82
|
+
final_prompt = "#{prompt}"
|
83
|
+
|
84
|
+
if text.length > @max_chunk_size
|
85
|
+
half = text.length / 2
|
86
|
+
final_prompt = "#{prompt}. Summarize it and keep it under #{half} characters"
|
87
|
+
end
|
88
|
+
|
89
|
+
return final_prompt
|
90
|
+
end
|
91
|
+
|
92
|
+
def summarize_split_text(split_text, prompt, split_as_markdown)
|
93
|
+
|
94
|
+
bar = TTY::ProgressBar.new("processing #{split_text.count} chunks [:bar]", total: split_text.count)
|
95
|
+
|
96
|
+
json_responses = []
|
97
|
+
split_text.each do |text|
|
98
|
+
do_concurently do
|
99
|
+
final_prompt = munge_prompt(text, prompt)
|
100
|
+
messages = [
|
101
|
+
{ role: "user", content: final_prompt },
|
102
|
+
{ role: "user", content: text }
|
103
|
+
]
|
104
|
+
json_responses << post_open_ai(API_COMPLETIONS_PATH, {
|
105
|
+
messages: messages
|
106
|
+
}.to_json)
|
107
|
+
bar.advance
|
108
|
+
end
|
109
|
+
end
|
110
|
+
|
111
|
+
# wait
|
112
|
+
wait_concurrently
|
113
|
+
return json_responses
|
114
|
+
end
|
115
|
+
|
116
|
+
def post_open_ai(endpoint, data)
|
117
|
+
# url
|
118
|
+
api_url = ENV["OPENAPI_URL"]
|
119
|
+
final_url = URI.join(api_url, endpoint)
|
120
|
+
|
121
|
+
# data
|
122
|
+
sanitized_data = data.gsub("'", "")
|
123
|
+
|
124
|
+
# post
|
125
|
+
api_key = ENV["OPENAPI_KEY"]
|
126
|
+
cmd = """
|
127
|
+
curl -s -X POST \
|
128
|
+
'#{final_url}' \
|
129
|
+
-H 'Content-Type: application/json' \
|
130
|
+
-H 'Authorization: Bearer #{api_key}' \
|
131
|
+
-d '#{sanitized_data}'
|
132
|
+
"""
|
133
|
+
response_text = `#{cmd}`
|
134
|
+
begin
|
135
|
+
timestamp = DateTime.now.strftime("%Y%m%d%H%M%S")
|
136
|
+
response_hash = JSON.parse(response_text)
|
137
|
+
|
138
|
+
# completions
|
139
|
+
raise if endpoint == API_COMPLETIONS_PATH and not response_hash.key?("choices")
|
140
|
+
|
141
|
+
# cache
|
142
|
+
save_to_cache(sanitized_data, timestamp, "request")
|
143
|
+
save_to_cache(response_text, timestamp, "response")
|
144
|
+
|
145
|
+
# response
|
146
|
+
return response_hash
|
147
|
+
rescue => e
|
148
|
+
#@logger.error e
|
149
|
+
@logger.error response_text
|
150
|
+
exit
|
151
|
+
end
|
152
|
+
end
|
153
|
+
|
154
|
+
def get_cache_path
|
155
|
+
cache_path = File.join(@cache_dir, ".open_ai_cache")
|
156
|
+
Dir.mkdir(cache_path) if !File.directory?(cache_path)
|
157
|
+
return cache_path
|
158
|
+
end
|
159
|
+
|
160
|
+
def save_to_cache(json_string, timestamp, postfix)
|
161
|
+
unless @cache_dir.nil?
|
162
|
+
current_datetime = DateTime.now.strftime("%Y%m%d%H%M%S")
|
163
|
+
filepath = File.join(get_cache_path, "#{current_datetime}_#{postfix}.json")
|
164
|
+
File.open(filepath, "w") do |file|
|
165
|
+
file.write(JSON.pretty_generate(JSON.parse(json_string)))
|
166
|
+
end
|
167
|
+
end
|
168
|
+
end
|
169
|
+
end
|
@@ -0,0 +1,78 @@
|
|
1
|
+
require 'baran'
|
2
|
+
|
3
|
+
class AITextSplitter
|
4
|
+
|
5
|
+
def initialize(max_chunk_size, logger)
|
6
|
+
@max_chunk_size = max_chunk_size
|
7
|
+
@cut_off = (@max_chunk_size + (@max_chunk_size * 0.1)).floor
|
8
|
+
@logger = logger
|
9
|
+
end
|
10
|
+
|
11
|
+
def split(data, split_as_markdown=false, separator)
|
12
|
+
return markdown_to_array(data) if split_as_markdown
|
13
|
+
return basic_split_then_reduce(data, separator)
|
14
|
+
end
|
15
|
+
|
16
|
+
private
|
17
|
+
|
18
|
+
#
|
19
|
+
# Separator Varients
|
20
|
+
#
|
21
|
+
|
22
|
+
# basic splitter, would lose context when splits got too small
|
23
|
+
def basic_splitter(data, separator)
|
24
|
+
separator = "\n\n" if separator.nil? or separator.empty?
|
25
|
+
splitter = Baran::CharacterTextSplitter.new(chunk_size: @max_chunk_size, chunk_overlap: 64, separator: separator)
|
26
|
+
chunks = splitter.chunks(data).collect {|c| c[:text]}
|
27
|
+
return reduce_to_max_size(chunks)
|
28
|
+
end
|
29
|
+
|
30
|
+
# Preferred: provides ever better context through insisting on splits near max_chunk_size
|
31
|
+
def basic_split_then_reduce(data, separator)
|
32
|
+
chunks = basic_splitter(data, separator)
|
33
|
+
return reduce_to_max_size(chunks)
|
34
|
+
end
|
35
|
+
|
36
|
+
# User can hint at split points; it didn't work great
|
37
|
+
def recursive_splitter(data, separator)
|
38
|
+
separator = ([separator] + ["\n\n"]).compact
|
39
|
+
splitter = Baran::RecursiveCharacterTextSplitter.new(
|
40
|
+
chunk_size: @max_chunk_size, chunk_overlap: 64,
|
41
|
+
separators: separator
|
42
|
+
)
|
43
|
+
chunks = splitter.chunks(data).collect {|c| c[:text]}
|
44
|
+
return reduce_to_max_size(chunks)
|
45
|
+
end
|
46
|
+
|
47
|
+
#
|
48
|
+
# Markdown
|
49
|
+
#
|
50
|
+
|
51
|
+
def markdown_to_array(data)
|
52
|
+
splitter = Baran::MarkdownSplitter.new()
|
53
|
+
return splitter.chunks(data).collect {|c| c[:text]}
|
54
|
+
end
|
55
|
+
|
56
|
+
#
|
57
|
+
# Splitting is done by separator and the LLM can respond
|
58
|
+
# with content of any length. Let's reduce the chunks by
|
59
|
+
# combining smaller responses up to @max_chunk_size
|
60
|
+
#
|
61
|
+
|
62
|
+
def reduce_to_max_size(chunks)
|
63
|
+
combined = []
|
64
|
+
i = 0
|
65
|
+
while i < chunks.length
|
66
|
+
c = chunks[i]
|
67
|
+
n = chunks[i + 1]
|
68
|
+
unless n.nil? or (c.length + n.length) > @cut_off
|
69
|
+
combined << [c, n].join("\n")
|
70
|
+
i += 2
|
71
|
+
else
|
72
|
+
combined << c
|
73
|
+
i += 1
|
74
|
+
end
|
75
|
+
end
|
76
|
+
combined
|
77
|
+
end
|
78
|
+
end
|
@@ -0,0 +1,41 @@
|
|
1
|
+
class Elastic
|
2
|
+
def initialize(elastic_url)
|
3
|
+
@elastic_url = elastic_url
|
4
|
+
end
|
5
|
+
|
6
|
+
def query_by_id(ids)
|
7
|
+
url = "/_search"
|
8
|
+
verb = "POST"
|
9
|
+
data = {
|
10
|
+
query: {
|
11
|
+
terms: {
|
12
|
+
_id: ids
|
13
|
+
}
|
14
|
+
}
|
15
|
+
}
|
16
|
+
sanitized_data = data.to_json
|
17
|
+
return post(url, verb, data)
|
18
|
+
end
|
19
|
+
|
20
|
+
def post(url, verb, data)
|
21
|
+
final_url = "#{@elastic_url}/#{url}"
|
22
|
+
|
23
|
+
# sanitize
|
24
|
+
sanitized_data = data.to_json
|
25
|
+
["'", "’"].each { |c| sanitized_data.gsub!(c, "\#{c}") }
|
26
|
+
|
27
|
+
# post
|
28
|
+
cmd = """
|
29
|
+
curl -s -X#{verb} \
|
30
|
+
#{final_url} \
|
31
|
+
-H 'Content-Type: application/json' \
|
32
|
+
-d '#{sanitized_data}'
|
33
|
+
"""
|
34
|
+
return `#{cmd}`
|
35
|
+
end
|
36
|
+
|
37
|
+
def create_index(elastic_index, body)
|
38
|
+
url = "#{elastic_index}"
|
39
|
+
return post(url, "PUT", body)
|
40
|
+
end
|
41
|
+
end
|