cl-magic 0.4.0 → 1.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,112 @@
1
+ #!/usr/bin/env ruby
2
+ # Fetch jira issues, status changelogs and save them to a file
3
+ require 'optparse'
4
+ require 'optparse/subcommand'
5
+ require 'tty-command'
6
+ require 'tty-prompt'
7
+
8
+ require 'cl/magic/common/common_options.rb'
9
+ require 'cl/magic/common/logging.rb'
10
+ require 'cl/magic/common/jira.rb'
11
+
12
+ require 'net/http'
13
+ require 'json'
14
+
15
+ @logger = get_logger()
16
+
17
+ #
18
+ # Features
19
+ #
20
+
21
+ def do_work(options)
22
+ break_at_one_page = false # when developing, set this to true
23
+ jira = Jira.new options[:base_uri], options[:username], options[:token], break_at_one_page
24
+
25
+ @logger.puts ""
26
+ @logger.wait "fetch epics"
27
+ epic_ids, epics = jira.get_epic_ids(options[:project], options[:epic_wildcard])
28
+
29
+ @logger.puts ""
30
+ @logger.wait "fetch issues"
31
+ issues = jira.get_issues_by_epic_ids(options[:project], epic_ids)
32
+
33
+ @logger.puts ""
34
+ @logger.wait "fetch change logs"
35
+ issues = jira.collect_status_changelogs(jira, issues)
36
+
37
+ @logger.puts ""
38
+ @logger.wait "fetch comments"
39
+ issues = jira.collect_comments(jira, issues)
40
+
41
+ @logger.puts ""
42
+ puts issues.to_json
43
+ end
44
+
45
+ #
46
+ # Options
47
+ #
48
+
49
+ options = {}
50
+ global_banner = <<DOC
51
+
52
+ Fetch jira issues, status changelogs and save them to a file
53
+
54
+ Usage: cl jira fetch-by-epics [options]
55
+
56
+ DOC
57
+
58
+ global = OptionParser.new do |g|
59
+ g.banner = global_banner
60
+ add_help_and_verbose(g)
61
+
62
+ g.on("--base-uri URI", "base uri for jira (ex. https://company.atlassian.net)") do |v|
63
+ options[:base_uri] = v
64
+ end
65
+
66
+ g.on("-u", "--username USERNAME", "jira username") do |v|
67
+ options[:username] = v
68
+ end
69
+
70
+ g.on("-t", "--token TOKEN", "jira token (you can create one, google it)") do |v|
71
+ options[:token] = v
72
+ end
73
+
74
+ g.on("-p", "--project KEY", "jira project to fetch from") do |v|
75
+ options[:project] = v
76
+ end
77
+
78
+ g.on("-w", "--epic-wildcard TEXT", "wildcard to filter the epics by") do |v|
79
+ options[:epic_wildcard] = v
80
+ end
81
+
82
+ end
83
+
84
+ #
85
+ # Run
86
+ #
87
+
88
+ @working_dir = ENV['CL_WORKING_DIR'] # passed through cl-magic to here
89
+ global.parse(ARGV)
90
+
91
+ # error on token right away
92
+ if options[:token].nil?
93
+ @logger.error "missing --token"
94
+ exit
95
+ end
96
+
97
+ # prompt for missing options
98
+ ask_and_store_option(options, :base_uri, "base_uri: ")
99
+ ask_and_store_option(options, :username, "username: ")
100
+ ask_and_store_option(options, :project, "project: ")
101
+ ask_and_store_option(options, :epic_wildcard, "epic_wildcard: ")
102
+
103
+ # display full command
104
+ write_history("""cl jira fetch-by-epics \\
105
+ --base-uri=#{options[:base_uri]} \\
106
+ --username=#{options[:username]} \\
107
+ --project=#{options[:project]} \\
108
+ --epic-wildcard=#{options[:epic_wildcard]} \\
109
+ --token REDACTED
110
+ """)
111
+
112
+ do_work(options)
@@ -0,0 +1,126 @@
1
+ #!/usr/bin/env ruby
2
+ # Jira fetch datafile to markdown
3
+ require 'optparse'
4
+ require 'optparse/subcommand'
5
+ require 'tty-command'
6
+ require 'tty-prompt'
7
+ require 'active_support/all'
8
+
9
+ require 'cl/magic/common/parse_and_pick.rb'
10
+ require 'cl/magic/common/common_options.rb'
11
+ require 'cl/magic/common/logging.rb'
12
+ require 'cl/magic/common/jira.rb'
13
+
14
+ require 'json'
15
+
16
+ @logger = get_logger()
17
+
18
+
19
+ def post(url, verb, data)
20
+ cmd = """
21
+ curl -X#{verb} \
22
+ #{url} \
23
+ -H 'Content-Type: application/json' \
24
+ -d '#{data.to_json}'
25
+ """
26
+ return `#{cmd}`
27
+ end
28
+
29
+ def create_index()
30
+ url = "#{@ELASTIC_URL}/jira"
31
+
32
+ return post(url, "PUT", {
33
+ "mappings": {
34
+ "properties": {
35
+ "text": {
36
+ "type": "text"
37
+ }
38
+ }
39
+ }
40
+ })
41
+ end
42
+
43
+ # def do_work(options, data)
44
+ # #puts create_index()
45
+
46
+ # url = "#{@ELASTIC_URL}/jira/_doc/1"
47
+ # puts post(url, "POST", {
48
+ # "text": "This is a new issue created in Jira"
49
+ # })
50
+ # end
51
+
52
+
53
+ #
54
+ # Features
55
+ #
56
+
57
+ def do_work(options)
58
+ filepath = File.join(@working_dir, options[:data_filepath])
59
+ issues = JSON.parse(File.read(filepath))
60
+ issues.each do |issue|
61
+
62
+ md = []
63
+ md << "# #{issue['key']}"
64
+ md << "project: #{issue['fields']['project']['key']}"
65
+ md << "created: #{issue['fields']['created']}"
66
+ md << "updated: #{issue['fields']['updated']}"
67
+ md << "status: #{issue['fields']['status']['statusCategory']['name']}" unless issue['fields']["status"].nil?
68
+ md << "priority: #{issue['fields']['priority']['name']}"
69
+ md << "labels: #{issue['fields']['labels'].join(',')}"
70
+ md << "issue_type: #{issue['fields']['issuetype']['name']}" unless issue['fields']["issuetype"].nil?
71
+ md << "assignee: #{issue['fields']['assignee']['displayName']}" unless issue['fields']["assignee"].nil?
72
+ md << ""
73
+ md << "## Summary:"
74
+ md << "#{issue['fields']['summary']}"
75
+ # push to elastic
76
+
77
+ md << "## Comments"
78
+ md << ""
79
+ issue["comments"].each_with_index do |comment, i|
80
+ md << "### Comment by #{comment["author"]["displayName"]} "
81
+ md << ""
82
+ md << "created: #{comment["created"]}"
83
+ md << "#{comment["body"].gsub('{noformat}', "\n```\n")}"
84
+ md << ""
85
+ end
86
+ puts md
87
+ end
88
+ end
89
+
90
+ #
91
+ # Options
92
+ #
93
+
94
+ options = {}
95
+ global_banner = <<DOC
96
+
97
+ Jira fetch datafile to markdown
98
+
99
+ Usage: cl jira to-markdown [options]
100
+
101
+ DOC
102
+
103
+ global = OptionParser.new do |g|
104
+ g.banner = global_banner
105
+ add_help_and_verbose(g)
106
+
107
+ g.on("-f", "--data-filepath FILEPATH", "relative path jira datafile") do |v|
108
+ options[:data_filepath] = v
109
+ end
110
+ end
111
+
112
+ #
113
+ # Run
114
+ #
115
+
116
+ @working_dir = ENV['CL_WORKING_DIR'] # passed through cl-magic to here
117
+ global.parse(ARGV)
118
+
119
+ ask_and_store_option(options, :data_filepath, "data_filepath: ")
120
+
121
+ # display full command
122
+ write_history("""cl jira to-markdown \\
123
+ --data-filepath=#{options[:data_filepath]}
124
+ """)
125
+
126
+ do_work(options)
@@ -0,0 +1,68 @@
1
+ #!/usr/bin/env ruby
2
+ # Jira fetch datafile to markdown
3
+ require 'optparse'
4
+ require 'optparse/subcommand'
5
+ require 'tty-command'
6
+ require 'tty-prompt'
7
+ require 'active_support/all'
8
+
9
+ require 'cl/magic/common/parse_and_pick.rb'
10
+ require 'cl/magic/common/common_options.rb'
11
+ require 'cl/magic/common/logging.rb'
12
+ require 'cl/magic/common/jira.rb'
13
+
14
+ require 'json'
15
+
16
+ @logger = get_logger()
17
+
18
+ #
19
+ # Features
20
+ #
21
+
22
+ def do_work(options)
23
+ filepath = File.join(@working_dir, options[:data_filepath])
24
+ issues = JSON.parse(File.read(filepath))
25
+ issues.each do |i|
26
+ issue_md, comments = Jira.jira_to_markdown(i)
27
+ puts issue_md
28
+ puts comments.map{ |o| o[1] }.join("\n")
29
+ end
30
+ end
31
+
32
+ #
33
+ # Options
34
+ #
35
+
36
+ options = {}
37
+ global_banner = <<DOC
38
+
39
+ Jira fetch datafile to markdown
40
+
41
+ Usage: cl jira to-markdown [options]
42
+
43
+ DOC
44
+
45
+ global = OptionParser.new do |g|
46
+ g.banner = global_banner
47
+ add_help_and_verbose(g)
48
+
49
+ g.on("-f", "--data-filepath FILEPATH", "relative path jira datafile") do |v|
50
+ options[:data_filepath] = v
51
+ end
52
+ end
53
+
54
+ #
55
+ # Run
56
+ #
57
+
58
+ @working_dir = ENV['CL_WORKING_DIR'] # passed through cl-magic to here
59
+ global.parse(ARGV)
60
+
61
+ ask_and_store_option(options, :data_filepath, "data_filepath: ")
62
+
63
+ # display full command
64
+ write_history("""cl jira to-markdown \\
65
+ --data-filepath=#{options[:data_filepath]}
66
+ """)
67
+
68
+ do_work(options)
@@ -1,5 +1,5 @@
1
1
  #!/usr/bin/env ruby
2
- # Fetch jira issues print stats
2
+ # Jira fetch datafile to stats
3
3
  require 'optparse'
4
4
  require 'optparse/subcommand'
5
5
  require 'tty-command'
@@ -14,25 +14,26 @@ require 'net/http'
14
14
  require 'json'
15
15
 
16
16
  @logger = get_logger()
17
- @cl_cmd_name = File.basename(__FILE__).split('-').join(' ')
18
17
 
19
18
  #
20
19
  # Features
21
20
  #
22
21
 
23
22
  def get_issues_from_datafile(options)
24
- issues = []
23
+ final_issues = []
25
24
  filepath = File.join(@working_dir, options[:data_filepath])
26
- File.foreach(filepath) do |line|
27
- issue = JSON.parse(line)
25
+ issues = JSON.parse(File.read(filepath))
26
+
27
+ @logger.info "stats for: #{options[:data_filepath]}"
28
+ issues.each do |issue|
28
29
  issuetype = issue["fields"]["issuetype"]["name"]
29
30
  labels = issue["fields"]["labels"]
30
31
 
31
32
  has_excluded_labels = (labels & options[:exclude_labels]).any?
32
33
  is_excluded_issuetype = options[:exclude_issuetypes].include?(issuetype.downcase)
33
- issues << issue unless has_excluded_labels or is_excluded_issuetype
34
+ final_issues << issue unless has_excluded_labels or is_excluded_issuetype
34
35
  end
35
- return issues
36
+ return final_issues
36
37
  end
37
38
 
38
39
  def in_range_issue_stats(issues, start_date, end_date, options)
@@ -137,6 +138,8 @@ end
137
138
  def do_work(options)
138
139
  issues = get_issues_from_datafile(options)
139
140
  oldest_date = oldest_issue_date(issues).beginning_of_week
141
+ @logger.info "starting at #{oldest_date}"
142
+
140
143
  iter_date_range(oldest_date) do |start_date, end_date|
141
144
  stat_hashes = in_range_issue_stats(issues, start_date, end_date, options)
142
145
  counts = print_stats(stat_hashes, start_date, end_date)
@@ -153,9 +156,9 @@ options = {
153
156
  }
154
157
  global_banner = <<DOC
155
158
 
156
- Process jira fetch file an return stats
159
+ Jira fetch datafile to stats
157
160
 
158
- Usage: #{@cl_cmd_name} [options]
161
+ Usage: cl jira to-stats [options]
159
162
 
160
163
  DOC
161
164
 
@@ -190,7 +193,7 @@ options[:exclude_issuetypes] = [] if options[:exclude_issuetypes].nil?
190
193
  options[:exclude_labels] = [] if options[:exclude_labels].nil?
191
194
 
192
195
  # display full command
193
- write_history("""#{@cl_cmd_name} \\
196
+ write_history("""cl jira to-stats \\
194
197
  --data-filepath=#{options[:data_filepath]} \\
195
198
  --exclude-issuetypes=#{options[:exclude_issuetypes].join(',')} \\
196
199
  --exclude-labels=#{options[:exclude_labels].join(',')}
@@ -17,8 +17,10 @@ require 'cl/magic/common/kubectl.rb'
17
17
  # Features
18
18
  #
19
19
 
20
+
20
21
  def do_work(options, pods, containers)
21
- cmd = "kubectl stern '#{pods.collect(&:first).join('|')}' --context #{options[:kube_context]} --namespace #{options[:namespace]} --container '#{containers.collect(&:first).join('|')}' --since #{options[:since]} --container-state 'running,waiting,terminated'"
22
+ container_name_regex = "^(#{containers.collect(&:first).join('|')})$"
23
+ cmd = "kubectl stern '#{pods.collect(&:first).join('|')}' --context #{options[:kube_context]} --namespace #{options[:namespace]} --container '#{container_name_regex}' --since #{options[:since]} --container-state 'running,waiting,terminated'"
22
24
  cmd += " | grep #{options[:grep]}" if options[:grep]
23
25
 
24
26
  @logger.puts
@@ -0,0 +1,169 @@
1
+
2
+ require 'json'
3
+ require 'uri'
4
+ require 'pp'
5
+ require 'digest'
6
+ require 'date'
7
+
8
+ require 'tty-progressbar'
9
+ require 'concurrent'
10
+
11
+ require 'cl/magic/common/ai_text_splitter.rb'
12
+
13
+
14
+ class AIPrompt
15
+ API_COMPLETIONS_PATH = "/openai/v1/chat/completions"
16
+ API_EMBEDDINGS_PATH = "/openai/v1/embeddings"
17
+ MAX_THREADS = 10 # set to 1 to debug without concurrency
18
+
19
+ def initialize(logger, cache_dir, max_chunk_size=10000, temperature=1)
20
+ @cache_dir = cache_dir
21
+ @logger = logger
22
+ @max_chunk_size = max_chunk_size
23
+ @temperature = temperature
24
+ @ai_text_splitter = AITextSplitter.new(@max_chunk_size, @logger)
25
+ @thread_pool = Concurrent::ThreadPoolExecutor.new(
26
+ min_threads: 0,
27
+ max_threads: MAX_THREADS,
28
+ max_queue: 0,
29
+ fallback_policy: :caller_runs
30
+ )
31
+ end
32
+
33
+ def gen_embeddings(input)
34
+ data = {
35
+ model: "text-embedding-ada-002",
36
+ input: input,
37
+ }
38
+ response = post_open_ai(API_EMBEDDINGS_PATH, data.to_json)
39
+ return response["data"][0]["embedding"]
40
+ end
41
+
42
+ def prompt(raw_data, prompt, split_as_markdown=false, separator)
43
+
44
+ # split
45
+ split_data = @ai_text_splitter.split(raw_data, split_as_markdown, separator)
46
+
47
+ # summarize
48
+ responses = summarize_split_text(split_data, prompt, split_as_markdown)
49
+
50
+ # map and return
51
+ return responses.collect do |json|
52
+ json["choices"].map {|c| c["message"]["content"]}.join("\n")
53
+ end
54
+ end
55
+
56
+ def clear_cache()
57
+ Dir.glob(File.join(get_cache_path, '*.json')).each do |file|
58
+ File.delete(file)
59
+ end
60
+ end
61
+
62
+ private
63
+
64
+ def do_concurently
65
+ if MAX_THREADS > 1
66
+ @thread_pool.post do
67
+ yield
68
+ end
69
+ else
70
+ yield
71
+ end
72
+ end
73
+
74
+ def wait_concurrently
75
+ if MAX_THREADS > 1
76
+ @thread_pool.shutdown
77
+ @thread_pool.wait_for_termination
78
+ end
79
+ end
80
+
81
+ def munge_prompt(text, prompt)
82
+ final_prompt = "#{prompt}"
83
+
84
+ if text.length > @max_chunk_size
85
+ half = text.length / 2
86
+ final_prompt = "#{prompt}. Summarize it and keep it under #{half} characters"
87
+ end
88
+
89
+ return final_prompt
90
+ end
91
+
92
+ def summarize_split_text(split_text, prompt, split_as_markdown)
93
+
94
+ bar = TTY::ProgressBar.new("processing #{split_text.count} chunks [:bar]", total: split_text.count)
95
+
96
+ json_responses = []
97
+ split_text.each do |text|
98
+ do_concurently do
99
+ final_prompt = munge_prompt(text, prompt)
100
+ messages = [
101
+ { role: "user", content: final_prompt },
102
+ { role: "user", content: text }
103
+ ]
104
+ json_responses << post_open_ai(API_COMPLETIONS_PATH, {
105
+ messages: messages
106
+ }.to_json)
107
+ bar.advance
108
+ end
109
+ end
110
+
111
+ # wait
112
+ wait_concurrently
113
+ return json_responses
114
+ end
115
+
116
+ def post_open_ai(endpoint, data)
117
+ # url
118
+ api_url = ENV["OPENAPI_URL"]
119
+ final_url = URI.join(api_url, endpoint)
120
+
121
+ # data
122
+ sanitized_data = data.gsub("'", "")
123
+
124
+ # post
125
+ api_key = ENV["OPENAPI_KEY"]
126
+ cmd = """
127
+ curl -s -X POST \
128
+ '#{final_url}' \
129
+ -H 'Content-Type: application/json' \
130
+ -H 'Authorization: Bearer #{api_key}' \
131
+ -d '#{sanitized_data}'
132
+ """
133
+ response_text = `#{cmd}`
134
+ begin
135
+ timestamp = DateTime.now.strftime("%Y%m%d%H%M%S")
136
+ response_hash = JSON.parse(response_text)
137
+
138
+ # completions
139
+ raise if endpoint == API_COMPLETIONS_PATH and not response_hash.key?("choices")
140
+
141
+ # cache
142
+ save_to_cache(sanitized_data, timestamp, "request")
143
+ save_to_cache(response_text, timestamp, "response")
144
+
145
+ # response
146
+ return response_hash
147
+ rescue => e
148
+ #@logger.error e
149
+ @logger.error response_text
150
+ exit
151
+ end
152
+ end
153
+
154
+ def get_cache_path
155
+ cache_path = File.join(@cache_dir, ".open_ai_cache")
156
+ Dir.mkdir(cache_path) if !File.directory?(cache_path)
157
+ return cache_path
158
+ end
159
+
160
+ def save_to_cache(json_string, timestamp, postfix)
161
+ unless @cache_dir.nil?
162
+ current_datetime = DateTime.now.strftime("%Y%m%d%H%M%S")
163
+ filepath = File.join(get_cache_path, "#{current_datetime}_#{postfix}.json")
164
+ File.open(filepath, "w") do |file|
165
+ file.write(JSON.pretty_generate(JSON.parse(json_string)))
166
+ end
167
+ end
168
+ end
169
+ end
@@ -0,0 +1,78 @@
1
+ require 'baran'
2
+
3
+ class AITextSplitter
4
+
5
+ def initialize(max_chunk_size, logger)
6
+ @max_chunk_size = max_chunk_size
7
+ @cut_off = (@max_chunk_size + (@max_chunk_size * 0.1)).floor
8
+ @logger = logger
9
+ end
10
+
11
+ def split(data, split_as_markdown=false, separator)
12
+ return markdown_to_array(data) if split_as_markdown
13
+ return basic_split_then_reduce(data, separator)
14
+ end
15
+
16
+ private
17
+
18
+ #
19
+ # Separator Varients
20
+ #
21
+
22
+ # basic splitter, would lose context when splits got too small
23
+ def basic_splitter(data, separator)
24
+ separator = "\n\n" if separator.nil? or separator.empty?
25
+ splitter = Baran::CharacterTextSplitter.new(chunk_size: @max_chunk_size, chunk_overlap: 64, separator: separator)
26
+ chunks = splitter.chunks(data).collect {|c| c[:text]}
27
+ return reduce_to_max_size(chunks)
28
+ end
29
+
30
+ # Preferred: provides ever better context through insisting on splits near max_chunk_size
31
+ def basic_split_then_reduce(data, separator)
32
+ chunks = basic_splitter(data, separator)
33
+ return reduce_to_max_size(chunks)
34
+ end
35
+
36
+ # User can hint at split points; it didn't work great
37
+ def recursive_splitter(data, separator)
38
+ separator = ([separator] + ["\n\n"]).compact
39
+ splitter = Baran::RecursiveCharacterTextSplitter.new(
40
+ chunk_size: @max_chunk_size, chunk_overlap: 64,
41
+ separators: separator
42
+ )
43
+ chunks = splitter.chunks(data).collect {|c| c[:text]}
44
+ return reduce_to_max_size(chunks)
45
+ end
46
+
47
+ #
48
+ # Markdown
49
+ #
50
+
51
+ def markdown_to_array(data)
52
+ splitter = Baran::MarkdownSplitter.new()
53
+ return splitter.chunks(data).collect {|c| c[:text]}
54
+ end
55
+
56
+ #
57
+ # Splitting is done by separator and the LLM can respond
58
+ # with content of any length. Let's reduce the chunks by
59
+ # combining smaller responses up to @max_chunk_size
60
+ #
61
+
62
+ def reduce_to_max_size(chunks)
63
+ combined = []
64
+ i = 0
65
+ while i < chunks.length
66
+ c = chunks[i]
67
+ n = chunks[i + 1]
68
+ unless n.nil? or (c.length + n.length) > @cut_off
69
+ combined << [c, n].join("\n")
70
+ i += 2
71
+ else
72
+ combined << c
73
+ i += 1
74
+ end
75
+ end
76
+ combined
77
+ end
78
+ end
@@ -17,7 +17,7 @@ def add_help(opts)
17
17
  end
18
18
 
19
19
  def ask_and_store_option(options, key, question)
20
- if options[key].nil?
20
+ if options[key].nil? or options[key].empty?
21
21
  options[key] = TTY::Prompt.new.ask(question)
22
22
  end
23
23
  end
@@ -0,0 +1,41 @@
1
+ class Elastic
2
+ def initialize(elastic_url)
3
+ @elastic_url = elastic_url
4
+ end
5
+
6
+ def query_by_id(ids)
7
+ url = "/_search"
8
+ verb = "POST"
9
+ data = {
10
+ query: {
11
+ terms: {
12
+ _id: ids
13
+ }
14
+ }
15
+ }
16
+ sanitized_data = data.to_json
17
+ return post(url, verb, data)
18
+ end
19
+
20
+ def post(url, verb, data)
21
+ final_url = "#{@elastic_url}/#{url}"
22
+
23
+ # sanitize
24
+ sanitized_data = data.to_json
25
+ ["'", "’"].each { |c| sanitized_data.gsub!(c, "\#{c}") }
26
+
27
+ # post
28
+ cmd = """
29
+ curl -s -X#{verb} \
30
+ #{final_url} \
31
+ -H 'Content-Type: application/json' \
32
+ -d '#{sanitized_data}'
33
+ """
34
+ return `#{cmd}`
35
+ end
36
+
37
+ def create_index(elastic_index, body)
38
+ url = "#{elastic_index}"
39
+ return post(url, "PUT", body)
40
+ end
41
+ end