cl-magic 0.4.0 → 1.2.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,112 @@
1
+ #!/usr/bin/env ruby
2
+ # Fetch jira issues, status changelogs and save them to a file
3
+ require 'optparse'
4
+ require 'optparse/subcommand'
5
+ require 'tty-command'
6
+ require 'tty-prompt'
7
+
8
+ require 'cl/magic/common/common_options.rb'
9
+ require 'cl/magic/common/logging.rb'
10
+ require 'cl/magic/common/jira.rb'
11
+
12
+ require 'net/http'
13
+ require 'json'
14
+
15
+ @logger = get_logger()
16
+
17
+ #
18
+ # Features
19
+ #
20
+
21
+ def do_work(options)
22
+ break_at_one_page = false # when developing, set this to true
23
+ jira = Jira.new options[:base_uri], options[:username], options[:token], break_at_one_page
24
+
25
+ @logger.puts ""
26
+ @logger.wait "fetch epics"
27
+ epic_ids, epics = jira.get_epic_ids(options[:project], options[:epic_wildcard])
28
+
29
+ @logger.puts ""
30
+ @logger.wait "fetch issues"
31
+ issues = jira.get_issues_by_epic_ids(options[:project], epic_ids)
32
+
33
+ @logger.puts ""
34
+ @logger.wait "fetch change logs"
35
+ issues = jira.collect_status_changelogs(jira, issues)
36
+
37
+ @logger.puts ""
38
+ @logger.wait "fetch comments"
39
+ issues = jira.collect_comments(jira, issues)
40
+
41
+ @logger.puts ""
42
+ puts issues.to_json
43
+ end
44
+
45
+ #
46
+ # Options
47
+ #
48
+
49
+ options = {}
50
+ global_banner = <<DOC
51
+
52
+ Fetch jira issues, status changelogs and save them to a file
53
+
54
+ Usage: cl jira fetch-by-epics [options]
55
+
56
+ DOC
57
+
58
+ global = OptionParser.new do |g|
59
+ g.banner = global_banner
60
+ add_help_and_verbose(g)
61
+
62
+ g.on("--base-uri URI", "base uri for jira (ex. https://company.atlassian.net)") do |v|
63
+ options[:base_uri] = v
64
+ end
65
+
66
+ g.on("-u", "--username USERNAME", "jira username") do |v|
67
+ options[:username] = v
68
+ end
69
+
70
+ g.on("-t", "--token TOKEN", "jira token (you can create one, google it)") do |v|
71
+ options[:token] = v
72
+ end
73
+
74
+ g.on("-p", "--project KEY", "jira project to fetch from") do |v|
75
+ options[:project] = v
76
+ end
77
+
78
+ g.on("-w", "--epic-wildcard TEXT", "wildcard to filter the epics by") do |v|
79
+ options[:epic_wildcard] = v
80
+ end
81
+
82
+ end
83
+
84
+ #
85
+ # Run
86
+ #
87
+
88
+ @working_dir = ENV['CL_WORKING_DIR'] # passed through cl-magic to here
89
+ global.parse(ARGV)
90
+
91
+ # error on token right away
92
+ if options[:token].nil?
93
+ @logger.error "missing --token"
94
+ exit
95
+ end
96
+
97
+ # prompt for missing options
98
+ ask_and_store_option(options, :base_uri, "base_uri: ")
99
+ ask_and_store_option(options, :username, "username: ")
100
+ ask_and_store_option(options, :project, "project: ")
101
+ ask_and_store_option(options, :epic_wildcard, "epic_wildcard: ")
102
+
103
+ # display full command
104
+ write_history("""cl jira fetch-by-epics \\
105
+ --base-uri=#{options[:base_uri]} \\
106
+ --username=#{options[:username]} \\
107
+ --project=#{options[:project]} \\
108
+ --epic-wildcard=#{options[:epic_wildcard]} \\
109
+ --token REDACTED
110
+ """)
111
+
112
+ do_work(options)
@@ -0,0 +1,126 @@
1
+ #!/usr/bin/env ruby
2
+ # Jira fetch datafile to markdown
3
+ require 'optparse'
4
+ require 'optparse/subcommand'
5
+ require 'tty-command'
6
+ require 'tty-prompt'
7
+ require 'active_support/all'
8
+
9
+ require 'cl/magic/common/parse_and_pick.rb'
10
+ require 'cl/magic/common/common_options.rb'
11
+ require 'cl/magic/common/logging.rb'
12
+ require 'cl/magic/common/jira.rb'
13
+
14
+ require 'json'
15
+
16
+ @logger = get_logger()
17
+
18
+
19
+ def post(url, verb, data)
20
+ cmd = """
21
+ curl -X#{verb} \
22
+ #{url} \
23
+ -H 'Content-Type: application/json' \
24
+ -d '#{data.to_json}'
25
+ """
26
+ return `#{cmd}`
27
+ end
28
+
29
+ def create_index()
30
+ url = "#{@ELASTIC_URL}/jira"
31
+
32
+ return post(url, "PUT", {
33
+ "mappings": {
34
+ "properties": {
35
+ "text": {
36
+ "type": "text"
37
+ }
38
+ }
39
+ }
40
+ })
41
+ end
42
+
43
+ # def do_work(options, data)
44
+ # #puts create_index()
45
+
46
+ # url = "#{@ELASTIC_URL}/jira/_doc/1"
47
+ # puts post(url, "POST", {
48
+ # "text": "This is a new issue created in Jira"
49
+ # })
50
+ # end
51
+
52
+
53
+ #
54
+ # Features
55
+ #
56
+
57
+ def do_work(options)
58
+ filepath = File.join(@working_dir, options[:data_filepath])
59
+ issues = JSON.parse(File.read(filepath))
60
+ issues.each do |issue|
61
+
62
+ md = []
63
+ md << "# #{issue['key']}"
64
+ md << "project: #{issue['fields']['project']['key']}"
65
+ md << "created: #{issue['fields']['created']}"
66
+ md << "updated: #{issue['fields']['updated']}"
67
+ md << "status: #{issue['fields']['status']['statusCategory']['name']}" unless issue['fields']["status"].nil?
68
+ md << "priority: #{issue['fields']['priority']['name']}"
69
+ md << "labels: #{issue['fields']['labels'].join(',')}"
70
+ md << "issue_type: #{issue['fields']['issuetype']['name']}" unless issue['fields']["issuetype"].nil?
71
+ md << "assignee: #{issue['fields']['assignee']['displayName']}" unless issue['fields']["assignee"].nil?
72
+ md << ""
73
+ md << "## Summary:"
74
+ md << "#{issue['fields']['summary']}"
75
+ # push to elastic
76
+
77
+ md << "## Comments"
78
+ md << ""
79
+ issue["comments"].each_with_index do |comment, i|
80
+ md << "### Comment by #{comment["author"]["displayName"]} "
81
+ md << ""
82
+ md << "created: #{comment["created"]}"
83
+ md << "#{comment["body"].gsub('{noformat}', "\n```\n")}"
84
+ md << ""
85
+ end
86
+ puts md
87
+ end
88
+ end
89
+
90
+ #
91
+ # Options
92
+ #
93
+
94
+ options = {}
95
+ global_banner = <<DOC
96
+
97
+ Jira fetch datafile to markdown
98
+
99
+ Usage: cl jira to-markdown [options]
100
+
101
+ DOC
102
+
103
+ global = OptionParser.new do |g|
104
+ g.banner = global_banner
105
+ add_help_and_verbose(g)
106
+
107
+ g.on("-f", "--data-filepath FILEPATH", "relative path jira datafile") do |v|
108
+ options[:data_filepath] = v
109
+ end
110
+ end
111
+
112
+ #
113
+ # Run
114
+ #
115
+
116
+ @working_dir = ENV['CL_WORKING_DIR'] # passed through cl-magic to here
117
+ global.parse(ARGV)
118
+
119
+ ask_and_store_option(options, :data_filepath, "data_filepath: ")
120
+
121
+ # display full command
122
+ write_history("""cl jira to-markdown \\
123
+ --data-filepath=#{options[:data_filepath]}
124
+ """)
125
+
126
+ do_work(options)
@@ -0,0 +1,68 @@
1
+ #!/usr/bin/env ruby
2
+ # Jira fetch datafile to markdown
3
+ require 'optparse'
4
+ require 'optparse/subcommand'
5
+ require 'tty-command'
6
+ require 'tty-prompt'
7
+ require 'active_support/all'
8
+
9
+ require 'cl/magic/common/parse_and_pick.rb'
10
+ require 'cl/magic/common/common_options.rb'
11
+ require 'cl/magic/common/logging.rb'
12
+ require 'cl/magic/common/jira.rb'
13
+
14
+ require 'json'
15
+
16
+ @logger = get_logger()
17
+
18
+ #
19
+ # Features
20
+ #
21
+
22
+ def do_work(options)
23
+ filepath = File.join(@working_dir, options[:data_filepath])
24
+ issues = JSON.parse(File.read(filepath))
25
+ issues.each do |i|
26
+ issue_md, comments = Jira.jira_to_markdown(i)
27
+ puts issue_md
28
+ puts comments.map{ |o| o[1] }.join("\n")
29
+ end
30
+ end
31
+
32
+ #
33
+ # Options
34
+ #
35
+
36
+ options = {}
37
+ global_banner = <<DOC
38
+
39
+ Jira fetch datafile to markdown
40
+
41
+ Usage: cl jira to-markdown [options]
42
+
43
+ DOC
44
+
45
+ global = OptionParser.new do |g|
46
+ g.banner = global_banner
47
+ add_help_and_verbose(g)
48
+
49
+ g.on("-f", "--data-filepath FILEPATH", "relative path jira datafile") do |v|
50
+ options[:data_filepath] = v
51
+ end
52
+ end
53
+
54
+ #
55
+ # Run
56
+ #
57
+
58
+ @working_dir = ENV['CL_WORKING_DIR'] # passed through cl-magic to here
59
+ global.parse(ARGV)
60
+
61
+ ask_and_store_option(options, :data_filepath, "data_filepath: ")
62
+
63
+ # display full command
64
+ write_history("""cl jira to-markdown \\
65
+ --data-filepath=#{options[:data_filepath]}
66
+ """)
67
+
68
+ do_work(options)
@@ -1,5 +1,5 @@
1
1
  #!/usr/bin/env ruby
2
- # Fetch jira issues print stats
2
+ # Jira fetch datafile to stats
3
3
  require 'optparse'
4
4
  require 'optparse/subcommand'
5
5
  require 'tty-command'
@@ -14,25 +14,26 @@ require 'net/http'
14
14
  require 'json'
15
15
 
16
16
  @logger = get_logger()
17
- @cl_cmd_name = File.basename(__FILE__).split('-').join(' ')
18
17
 
19
18
  #
20
19
  # Features
21
20
  #
22
21
 
23
22
  def get_issues_from_datafile(options)
24
- issues = []
23
+ final_issues = []
25
24
  filepath = File.join(@working_dir, options[:data_filepath])
26
- File.foreach(filepath) do |line|
27
- issue = JSON.parse(line)
25
+ issues = JSON.parse(File.read(filepath))
26
+
27
+ @logger.info "stats for: #{options[:data_filepath]}"
28
+ issues.each do |issue|
28
29
  issuetype = issue["fields"]["issuetype"]["name"]
29
30
  labels = issue["fields"]["labels"]
30
31
 
31
32
  has_excluded_labels = (labels & options[:exclude_labels]).any?
32
33
  is_excluded_issuetype = options[:exclude_issuetypes].include?(issuetype.downcase)
33
- issues << issue unless has_excluded_labels or is_excluded_issuetype
34
+ final_issues << issue unless has_excluded_labels or is_excluded_issuetype
34
35
  end
35
- return issues
36
+ return final_issues
36
37
  end
37
38
 
38
39
  def in_range_issue_stats(issues, start_date, end_date, options)
@@ -137,6 +138,8 @@ end
137
138
  def do_work(options)
138
139
  issues = get_issues_from_datafile(options)
139
140
  oldest_date = oldest_issue_date(issues).beginning_of_week
141
+ @logger.info "starting at #{oldest_date}"
142
+
140
143
  iter_date_range(oldest_date) do |start_date, end_date|
141
144
  stat_hashes = in_range_issue_stats(issues, start_date, end_date, options)
142
145
  counts = print_stats(stat_hashes, start_date, end_date)
@@ -153,9 +156,9 @@ options = {
153
156
  }
154
157
  global_banner = <<DOC
155
158
 
156
- Process jira fetch file an return stats
159
+ Jira fetch datafile to stats
157
160
 
158
- Usage: #{@cl_cmd_name} [options]
161
+ Usage: cl jira to-stats [options]
159
162
 
160
163
  DOC
161
164
 
@@ -190,7 +193,7 @@ options[:exclude_issuetypes] = [] if options[:exclude_issuetypes].nil?
190
193
  options[:exclude_labels] = [] if options[:exclude_labels].nil?
191
194
 
192
195
  # display full command
193
- write_history("""#{@cl_cmd_name} \\
196
+ write_history("""cl jira to-stats \\
194
197
  --data-filepath=#{options[:data_filepath]} \\
195
198
  --exclude-issuetypes=#{options[:exclude_issuetypes].join(',')} \\
196
199
  --exclude-labels=#{options[:exclude_labels].join(',')}
@@ -17,8 +17,10 @@ require 'cl/magic/common/kubectl.rb'
17
17
  # Features
18
18
  #
19
19
 
20
+
20
21
  def do_work(options, pods, containers)
21
- cmd = "kubectl stern '#{pods.collect(&:first).join('|')}' --context #{options[:kube_context]} --namespace #{options[:namespace]} --container '#{containers.collect(&:first).join('|')}' --since #{options[:since]} --container-state 'running,waiting,terminated'"
22
+ container_name_regex = "^(#{containers.collect(&:first).join('|')})$"
23
+ cmd = "kubectl stern '#{pods.collect(&:first).join('|')}' --context #{options[:kube_context]} --namespace #{options[:namespace]} --container '#{container_name_regex}' --since #{options[:since]} --container-state 'running,waiting,terminated'"
22
24
  cmd += " | grep #{options[:grep]}" if options[:grep]
23
25
 
24
26
  @logger.puts
@@ -0,0 +1,169 @@
1
+
2
+ require 'json'
3
+ require 'uri'
4
+ require 'pp'
5
+ require 'digest'
6
+ require 'date'
7
+
8
+ require 'tty-progressbar'
9
+ require 'concurrent'
10
+
11
+ require 'cl/magic/common/ai_text_splitter.rb'
12
+
13
+
14
+ class AIPrompt
15
+ API_COMPLETIONS_PATH = "/openai/v1/chat/completions"
16
+ API_EMBEDDINGS_PATH = "/openai/v1/embeddings"
17
+ MAX_THREADS = 10 # set to 1 to debug without concurrency
18
+
19
+ def initialize(logger, cache_dir, max_chunk_size=10000, temperature=1)
20
+ @cache_dir = cache_dir
21
+ @logger = logger
22
+ @max_chunk_size = max_chunk_size
23
+ @temperature = temperature
24
+ @ai_text_splitter = AITextSplitter.new(@max_chunk_size, @logger)
25
+ @thread_pool = Concurrent::ThreadPoolExecutor.new(
26
+ min_threads: 0,
27
+ max_threads: MAX_THREADS,
28
+ max_queue: 0,
29
+ fallback_policy: :caller_runs
30
+ )
31
+ end
32
+
33
+ def gen_embeddings(input)
34
+ data = {
35
+ model: "text-embedding-ada-002",
36
+ input: input,
37
+ }
38
+ response = post_open_ai(API_EMBEDDINGS_PATH, data.to_json)
39
+ return response["data"][0]["embedding"]
40
+ end
41
+
42
+ def prompt(raw_data, prompt, split_as_markdown=false, separator)
43
+
44
+ # split
45
+ split_data = @ai_text_splitter.split(raw_data, split_as_markdown, separator)
46
+
47
+ # summarize
48
+ responses = summarize_split_text(split_data, prompt, split_as_markdown)
49
+
50
+ # map and return
51
+ return responses.collect do |json|
52
+ json["choices"].map {|c| c["message"]["content"]}.join("\n")
53
+ end
54
+ end
55
+
56
+ def clear_cache()
57
+ Dir.glob(File.join(get_cache_path, '*.json')).each do |file|
58
+ File.delete(file)
59
+ end
60
+ end
61
+
62
+ private
63
+
64
+ def do_concurently
65
+ if MAX_THREADS > 1
66
+ @thread_pool.post do
67
+ yield
68
+ end
69
+ else
70
+ yield
71
+ end
72
+ end
73
+
74
+ def wait_concurrently
75
+ if MAX_THREADS > 1
76
+ @thread_pool.shutdown
77
+ @thread_pool.wait_for_termination
78
+ end
79
+ end
80
+
81
+ def munge_prompt(text, prompt)
82
+ final_prompt = "#{prompt}"
83
+
84
+ if text.length > @max_chunk_size
85
+ half = text.length / 2
86
+ final_prompt = "#{prompt}. Summarize it and keep it under #{half} characters"
87
+ end
88
+
89
+ return final_prompt
90
+ end
91
+
92
+ def summarize_split_text(split_text, prompt, split_as_markdown)
93
+
94
+ bar = TTY::ProgressBar.new("processing #{split_text.count} chunks [:bar]", total: split_text.count)
95
+
96
+ json_responses = []
97
+ split_text.each do |text|
98
+ do_concurently do
99
+ final_prompt = munge_prompt(text, prompt)
100
+ messages = [
101
+ { role: "user", content: final_prompt },
102
+ { role: "user", content: text }
103
+ ]
104
+ json_responses << post_open_ai(API_COMPLETIONS_PATH, {
105
+ messages: messages
106
+ }.to_json)
107
+ bar.advance
108
+ end
109
+ end
110
+
111
+ # wait
112
+ wait_concurrently
113
+ return json_responses
114
+ end
115
+
116
+ def post_open_ai(endpoint, data)
117
+ # url
118
+ api_url = ENV["OPENAPI_URL"]
119
+ final_url = URI.join(api_url, endpoint)
120
+
121
+ # data
122
+ sanitized_data = data.gsub("'", "")
123
+
124
+ # post
125
+ api_key = ENV["OPENAPI_KEY"]
126
+ cmd = """
127
+ curl -s -X POST \
128
+ '#{final_url}' \
129
+ -H 'Content-Type: application/json' \
130
+ -H 'Authorization: Bearer #{api_key}' \
131
+ -d '#{sanitized_data}'
132
+ """
133
+ response_text = `#{cmd}`
134
+ begin
135
+ timestamp = DateTime.now.strftime("%Y%m%d%H%M%S")
136
+ response_hash = JSON.parse(response_text)
137
+
138
+ # completions
139
+ raise if endpoint == API_COMPLETIONS_PATH and not response_hash.key?("choices")
140
+
141
+ # cache
142
+ save_to_cache(sanitized_data, timestamp, "request")
143
+ save_to_cache(response_text, timestamp, "response")
144
+
145
+ # response
146
+ return response_hash
147
+ rescue => e
148
+ #@logger.error e
149
+ @logger.error response_text
150
+ exit
151
+ end
152
+ end
153
+
154
+ def get_cache_path
155
+ cache_path = File.join(@cache_dir, ".open_ai_cache")
156
+ Dir.mkdir(cache_path) if !File.directory?(cache_path)
157
+ return cache_path
158
+ end
159
+
160
+ def save_to_cache(json_string, timestamp, postfix)
161
+ unless @cache_dir.nil?
162
+ current_datetime = DateTime.now.strftime("%Y%m%d%H%M%S")
163
+ filepath = File.join(get_cache_path, "#{current_datetime}_#{postfix}.json")
164
+ File.open(filepath, "w") do |file|
165
+ file.write(JSON.pretty_generate(JSON.parse(json_string)))
166
+ end
167
+ end
168
+ end
169
+ end
@@ -0,0 +1,78 @@
1
+ require 'baran'
2
+
3
+ class AITextSplitter
4
+
5
+ def initialize(max_chunk_size, logger)
6
+ @max_chunk_size = max_chunk_size
7
+ @cut_off = (@max_chunk_size + (@max_chunk_size * 0.1)).floor
8
+ @logger = logger
9
+ end
10
+
11
+ def split(data, split_as_markdown=false, separator)
12
+ return markdown_to_array(data) if split_as_markdown
13
+ return basic_split_then_reduce(data, separator)
14
+ end
15
+
16
+ private
17
+
18
+ #
19
+ # Separator Varients
20
+ #
21
+
22
+ # basic splitter, would lose context when splits got too small
23
+ def basic_splitter(data, separator)
24
+ separator = "\n\n" if separator.nil? or separator.empty?
25
+ splitter = Baran::CharacterTextSplitter.new(chunk_size: @max_chunk_size, chunk_overlap: 64, separator: separator)
26
+ chunks = splitter.chunks(data).collect {|c| c[:text]}
27
+ return reduce_to_max_size(chunks)
28
+ end
29
+
30
+ # Preferred: provides ever better context through insisting on splits near max_chunk_size
31
+ def basic_split_then_reduce(data, separator)
32
+ chunks = basic_splitter(data, separator)
33
+ return reduce_to_max_size(chunks)
34
+ end
35
+
36
+ # User can hint at split points; it didn't work great
37
+ def recursive_splitter(data, separator)
38
+ separator = ([separator] + ["\n\n"]).compact
39
+ splitter = Baran::RecursiveCharacterTextSplitter.new(
40
+ chunk_size: @max_chunk_size, chunk_overlap: 64,
41
+ separators: separator
42
+ )
43
+ chunks = splitter.chunks(data).collect {|c| c[:text]}
44
+ return reduce_to_max_size(chunks)
45
+ end
46
+
47
+ #
48
+ # Markdown
49
+ #
50
+
51
+ def markdown_to_array(data)
52
+ splitter = Baran::MarkdownSplitter.new()
53
+ return splitter.chunks(data).collect {|c| c[:text]}
54
+ end
55
+
56
+ #
57
+ # Splitting is done by separator and the LLM can respond
58
+ # with content of any length. Let's reduce the chunks by
59
+ # combining smaller responses up to @max_chunk_size
60
+ #
61
+
62
+ def reduce_to_max_size(chunks)
63
+ combined = []
64
+ i = 0
65
+ while i < chunks.length
66
+ c = chunks[i]
67
+ n = chunks[i + 1]
68
+ unless n.nil? or (c.length + n.length) > @cut_off
69
+ combined << [c, n].join("\n")
70
+ i += 2
71
+ else
72
+ combined << c
73
+ i += 1
74
+ end
75
+ end
76
+ combined
77
+ end
78
+ end
@@ -17,7 +17,7 @@ def add_help(opts)
17
17
  end
18
18
 
19
19
  def ask_and_store_option(options, key, question)
20
- if options[key].nil?
20
+ if options[key].nil? or options[key].empty?
21
21
  options[key] = TTY::Prompt.new.ask(question)
22
22
  end
23
23
  end
@@ -0,0 +1,41 @@
1
+ class Elastic
2
+ def initialize(elastic_url)
3
+ @elastic_url = elastic_url
4
+ end
5
+
6
+ def query_by_id(ids)
7
+ url = "/_search"
8
+ verb = "POST"
9
+ data = {
10
+ query: {
11
+ terms: {
12
+ _id: ids
13
+ }
14
+ }
15
+ }
16
+ sanitized_data = data.to_json
17
+ return post(url, verb, data)
18
+ end
19
+
20
+ def post(url, verb, data)
21
+ final_url = "#{@elastic_url}/#{url}"
22
+
23
+ # sanitize
24
+ sanitized_data = data.to_json
25
+ ["'", "’"].each { |c| sanitized_data.gsub!(c, "\#{c}") }
26
+
27
+ # post
28
+ cmd = """
29
+ curl -s -X#{verb} \
30
+ #{final_url} \
31
+ -H 'Content-Type: application/json' \
32
+ -d '#{sanitized_data}'
33
+ """
34
+ return `#{cmd}`
35
+ end
36
+
37
+ def create_index(elastic_index, body)
38
+ url = "#{elastic_index}"
39
+ return post(url, "PUT", body)
40
+ end
41
+ end