RubyGems - cl-magic - Versions diffs - 0.4.0 → 1.2.0 - Mend

cl-magic 0.4.0 → 1.2.0

Files changed (30) hide show

checksums.yaml +4 -4
data/Gemfile.lock +16 -2
data/cl-magic.gemspec +5 -0
data/lib/cl/magic/cl +21 -14
data/lib/cl/magic/cl-ai-chat +117 -0
data/lib/cl/magic/cl-ai-query +116 -0
data/lib/cl/magic/cl-ai-store-jira +158 -0
data/lib/cl/magic/cl-dk +23 -456
data/lib/cl/magic/cl-dk-make +174 -0
data/lib/cl/magic/cl-dk-make-world +163 -0
data/lib/cl/magic/cl-dk-parts +253 -0
data/lib/cl/magic/cl-dk-world +140 -0
data/lib/cl/magic/cl-jira-fetch +15 -47
data/lib/cl/magic/cl-jira-fetch-by-epics +112 -0
data/lib/cl/magic/cl-jira-to-elastic +126 -0
data/lib/cl/magic/cl-jira-to-markdown +68 -0
data/lib/cl/magic/{cl-jira-stats → cl-jira-to-stats} +13 -10
data/lib/cl/magic/cl-kube-logs +3 -1
data/lib/cl/magic/common/ai_prompt.rb +169 -0
data/lib/cl/magic/common/ai_text_splitter.rb +78 -0
data/lib/cl/magic/common/common_options.rb +1 -1
data/lib/cl/magic/common/elastic.rb +41 -0
data/lib/cl/magic/common/jira.rb +169 -42
data/lib/cl/magic/common/milvus.rb +78 -0
data/lib/cl/magic/dk/help_printer.rb +29 -0
data/lib/cl/magic/dk/parts_merger.rb +67 -0
data/lib/cl/magic/dk/world_settings.rb +52 -0
data/lib/cl/magic/dk/yaml_arg_munger.rb +107 -0
data/lib/cl/magic/version.rb +1 -1
metadata +77 -3

data/lib/cl/magic/cl-jira-fetch-by-epics ADDED Viewed

@@ -0,0 +1,112 @@
+#!/usr/bin/env ruby
+# Fetch jira issues, status changelogs and save them to a file
+require 'optparse'
+require 'optparse/subcommand'
+require 'tty-command'
+require 'tty-prompt'
+require 'cl/magic/common/common_options.rb'
+require 'cl/magic/common/logging.rb'
+require 'cl/magic/common/jira.rb'
+require 'net/http'
+require 'json'
+@logger = get_logger()
+#
+# Features
+#
+def do_work(options)
+  break_at_one_page = false # when developing, set this to true
+  jira = Jira.new options[:base_uri], options[:username], options[:token], break_at_one_page
+  @logger.puts ""
+  @logger.wait "fetch epics"
+  epic_ids, epics = jira.get_epic_ids(options[:project], options[:epic_wildcard])
+  @logger.puts ""
+  @logger.wait "fetch issues"
+  issues = jira.get_issues_by_epic_ids(options[:project], epic_ids)
+  @logger.puts ""
+  @logger.wait "fetch change logs"
+  issues = jira.collect_status_changelogs(jira, issues)
+  @logger.puts ""
+  @logger.wait "fetch comments"
+  issues = jira.collect_comments(jira, issues)
+  @logger.puts ""
+  puts issues.to_json
+end
+#
+# Options
+#
+options = {}
+global_banner = <<DOC
+Fetch jira issues, status changelogs and save them to a file
+ Usage: cl jira fetch-by-epics [options]
+DOC
+global = OptionParser.new do |g|
+  g.banner = global_banner
+  add_help_and_verbose(g)
+  g.on("--base-uri URI", "base uri for jira (ex. https://company.atlassian.net)") do |v|
+    options[:base_uri] = v
+  end
+  g.on("-u", "--username USERNAME", "jira username") do |v|
+    options[:username] = v
+  end
+  g.on("-t", "--token TOKEN", "jira token (you can create one, google it)") do |v|
+    options[:token] = v
+  end
+  g.on("-p", "--project KEY", "jira project to fetch from") do |v|
+    options[:project] = v
+  end
+  g.on("-w", "--epic-wildcard TEXT", "wildcard to filter the epics by") do |v|
+    options[:epic_wildcard] = v
+  end
+end
+#
+# Run
+#
+@working_dir = ENV['CL_WORKING_DIR'] # passed through cl-magic to here
+global.parse(ARGV)
+# error on token right away
+if options[:token].nil?
+  @logger.error "missing --token"
+  exit
+end
+# prompt for missing options
+ask_and_store_option(options, :base_uri, "base_uri: ")
+ask_and_store_option(options, :username, "username: ")
+ask_and_store_option(options, :project, "project: ")
+ask_and_store_option(options, :epic_wildcard, "epic_wildcard: ")
+# display full command
+write_history("""cl jira fetch-by-epics \\
+  --base-uri=#{options[:base_uri]} \\
+  --username=#{options[:username]} \\
+  --project=#{options[:project]} \\
+  --epic-wildcard=#{options[:epic_wildcard]} \\
+  --token REDACTED
+""")
+do_work(options)

data/lib/cl/magic/cl-jira-to-elastic ADDED Viewed

@@ -0,0 +1,126 @@
+#!/usr/bin/env ruby
+# Jira fetch datafile to markdown
+require 'optparse'
+require 'optparse/subcommand'
+require 'tty-command'
+require 'tty-prompt'
+require 'active_support/all'
+require 'cl/magic/common/parse_and_pick.rb'
+require 'cl/magic/common/common_options.rb'
+require 'cl/magic/common/logging.rb'
+require 'cl/magic/common/jira.rb'
+require 'json'
+@logger = get_logger()
+def post(url, verb, data)
+  cmd = """
+    curl -X#{verb} \
+      #{url} \
+      -H 'Content-Type: application/json' \
+      -d '#{data.to_json}'
+  """
+  return `#{cmd}`
+end
+def create_index()
+  url = "#{@ELASTIC_URL}/jira"
+  return post(url, "PUT", {
+    "mappings": {
+      "properties": {
+        "text": {
+          "type": "text"
+        }
+      }
+    }
+  })
+end
+# def do_work(options, data)
+#   #puts create_index()
+#   url = "#{@ELASTIC_URL}/jira/_doc/1"
+#   puts post(url, "POST", {
+#     "text": "This is a new issue created in Jira"
+#   })
+# end
+#
+# Features
+#
+def do_work(options)
+  filepath = File.join(@working_dir, options[:data_filepath])
+  issues = JSON.parse(File.read(filepath))
+  issues.each do |issue|
+    md = []
+    md << "# #{issue['key']}"
+    md << "project:     #{issue['fields']['project']['key']}"
+    md << "created:     #{issue['fields']['created']}"
+    md << "updated:     #{issue['fields']['updated']}"
+    md << "status:      #{issue['fields']['status']['statusCategory']['name']}" unless issue['fields']["status"].nil?
+    md << "priority:    #{issue['fields']['priority']['name']}"
+    md << "labels:      #{issue['fields']['labels'].join(',')}"
+    md << "issue_type:  #{issue['fields']['issuetype']['name']}" unless issue['fields']["issuetype"].nil?
+    md << "assignee:    #{issue['fields']['assignee']['displayName']}" unless issue['fields']["assignee"].nil?
+    md << ""
+    md << "## Summary:"
+    md << "#{issue['fields']['summary']}"
+    # push to elastic
+    md << "## Comments"
+    md << ""
+    issue["comments"].each_with_index do |comment, i|
+      md << "### Comment by #{comment["author"]["displayName"]} "
+      md << ""
+      md << "created: #{comment["created"]}"
+      md << "#{comment["body"].gsub('{noformat}', "\n```\n")}"
+      md << ""
+    end
+    puts md
+  end
+end
+#
+# Options
+#
+options = {}
+global_banner = <<DOC
+Jira fetch datafile to markdown
+ Usage: cl jira to-markdown [options]
+DOC
+global = OptionParser.new do |g|
+  g.banner = global_banner
+  add_help_and_verbose(g)
+  g.on("-f", "--data-filepath FILEPATH", "relative path jira datafile") do |v|
+    options[:data_filepath] = v
+  end
+end
+#
+# Run
+#
+@working_dir = ENV['CL_WORKING_DIR'] # passed through cl-magic to here
+global.parse(ARGV)
+ask_and_store_option(options, :data_filepath, "data_filepath: ")
+# display full command
+write_history("""cl jira to-markdown \\
+  --data-filepath=#{options[:data_filepath]}
+""")
+do_work(options)

data/lib/cl/magic/cl-jira-to-markdown ADDED Viewed

@@ -0,0 +1,68 @@
+#!/usr/bin/env ruby
+# Jira fetch datafile to markdown
+require 'optparse'
+require 'optparse/subcommand'
+require 'tty-command'
+require 'tty-prompt'
+require 'active_support/all'
+require 'cl/magic/common/parse_and_pick.rb'
+require 'cl/magic/common/common_options.rb'
+require 'cl/magic/common/logging.rb'
+require 'cl/magic/common/jira.rb'
+require 'json'
+@logger = get_logger()
+#
+# Features
+#
+def do_work(options)
+  filepath = File.join(@working_dir, options[:data_filepath])
+  issues = JSON.parse(File.read(filepath))
+  issues.each do |i|
+    issue_md, comments = Jira.jira_to_markdown(i)
+    puts issue_md
+    puts comments.map{ |o| o[1] }.join("\n")
+  end
+end
+#
+# Options
+#
+options = {}
+global_banner = <<DOC
+Jira fetch datafile to markdown
+ Usage: cl jira to-markdown [options]
+DOC
+global = OptionParser.new do |g|
+  g.banner = global_banner
+  add_help_and_verbose(g)
+  g.on("-f", "--data-filepath FILEPATH", "relative path jira datafile") do |v|
+    options[:data_filepath] = v
+  end
+end
+#
+# Run
+#
+@working_dir = ENV['CL_WORKING_DIR'] # passed through cl-magic to here
+global.parse(ARGV)
+ask_and_store_option(options, :data_filepath, "data_filepath: ")
+# display full command
+write_history("""cl jira to-markdown \\
+  --data-filepath=#{options[:data_filepath]}
+""")
+do_work(options)

data/lib/cl/magic/{cl-jira-stats → cl-jira-to-stats} RENAMED Viewed

@@ -1,5 +1,5 @@
 #!/usr/bin/env ruby
-# Fetch jira issues print stats
+# Jira fetch datafile to stats
 require 'optparse'
 require 'optparse/subcommand'
 require 'tty-command'
@@ -14,25 +14,26 @@ require 'net/http'
 require 'json'
 @logger = get_logger()
-@cl_cmd_name = File.basename(__FILE__).split('-').join(' ')
 #
 # Features
 #
 def get_issues_from_datafile(options)
-  issues = []
+  final_issues = []
   filepath = File.join(@working_dir, options[:data_filepath])
-  File.foreach(filepath) do |line|
-    issue = JSON.parse(line)
+  issues = JSON.parse(File.read(filepath))
+  @logger.info "stats for: #{options[:data_filepath]}"
+  issues.each do |issue|
     issuetype = issue["fields"]["issuetype"]["name"]
     labels = issue["fields"]["labels"]
     has_excluded_labels = (labels & options[:exclude_labels]).any?
     is_excluded_issuetype = options[:exclude_issuetypes].include?(issuetype.downcase)
-    issues << issue unless has_excluded_labels or is_excluded_issuetype
+    final_issues << issue unless has_excluded_labels or is_excluded_issuetype
   end
-  return issues
+  return final_issues
 end
 def in_range_issue_stats(issues, start_date, end_date, options)
@@ -137,6 +138,8 @@ end
 def do_work(options)
   issues = get_issues_from_datafile(options)
   oldest_date = oldest_issue_date(issues).beginning_of_week
+  @logger.info "starting at #{oldest_date}"
   iter_date_range(oldest_date) do |start_date, end_date|
     stat_hashes = in_range_issue_stats(issues, start_date, end_date, options)
     counts = print_stats(stat_hashes, start_date, end_date)
@@ -153,9 +156,9 @@ options = {
 }
 global_banner = <<DOC
-Process jira fetch file an return stats
+Jira fetch datafile to stats
- Usage: #{@cl_cmd_name} [options]
+ Usage: cl jira to-stats [options]
 DOC
@@ -190,7 +193,7 @@ options[:exclude_issuetypes] = [] if options[:exclude_issuetypes].nil?
 options[:exclude_labels] = [] if options[:exclude_labels].nil?
 # display full command
-write_history("""#{@cl_cmd_name} \\
+write_history("""cl jira to-stats \\
   --data-filepath=#{options[:data_filepath]} \\
   --exclude-issuetypes=#{options[:exclude_issuetypes].join(',')} \\
   --exclude-labels=#{options[:exclude_labels].join(',')}

data/lib/cl/magic/cl-kube-logs CHANGED Viewed

@@ -17,8 +17,10 @@ require 'cl/magic/common/kubectl.rb'
 # Features
 #
 def do_work(options, pods, containers)
-  cmd = "kubectl stern '#{pods.collect(&:first).join('|')}' --context #{options[:kube_context]} --namespace #{options[:namespace]} --container '#{containers.collect(&:first).join('|')}' --since #{options[:since]} --container-state 'running,waiting,terminated'"
+  container_name_regex = "^(#{containers.collect(&:first).join('|')})$"
+  cmd = "kubectl stern '#{pods.collect(&:first).join('|')}' --context #{options[:kube_context]} --namespace #{options[:namespace]} --container '#{container_name_regex}' --since #{options[:since]} --container-state 'running,waiting,terminated'"
   cmd += " | grep #{options[:grep]}" if options[:grep]
   @logger.puts

data/lib/cl/magic/common/ai_prompt.rb ADDED Viewed

@@ -0,0 +1,169 @@
+require 'json'
+require 'uri'
+require 'pp'
+require 'digest'
+require 'date'
+require 'tty-progressbar'
+require 'concurrent'
+require 'cl/magic/common/ai_text_splitter.rb'
+class AIPrompt
+  API_COMPLETIONS_PATH = "/openai/v1/chat/completions"
+  API_EMBEDDINGS_PATH = "/openai/v1/embeddings"
+  MAX_THREADS = 10 # set to 1 to debug without concurrency
+  def initialize(logger, cache_dir, max_chunk_size=10000, temperature=1)
+    @cache_dir = cache_dir
+    @logger = logger
+    @max_chunk_size = max_chunk_size
+    @temperature = temperature
+    @ai_text_splitter = AITextSplitter.new(@max_chunk_size, @logger)
+    @thread_pool = Concurrent::ThreadPoolExecutor.new(
+      min_threads: 0,
+      max_threads: MAX_THREADS,
+      max_queue: 0,
+      fallback_policy: :caller_runs
+    )
+  end
+  def gen_embeddings(input)
+    data = {
+      model: "text-embedding-ada-002",
+      input: input,
+    }
+    response = post_open_ai(API_EMBEDDINGS_PATH, data.to_json)
+    return response["data"][0]["embedding"]
+  end
+  def prompt(raw_data, prompt, split_as_markdown=false, separator)
+    # split
+    split_data = @ai_text_splitter.split(raw_data, split_as_markdown, separator)
+    # summarize
+    responses = summarize_split_text(split_data, prompt, split_as_markdown)
+    # map and return
+    return responses.collect do |json|
+      json["choices"].map {|c| c["message"]["content"]}.join("\n")
+    end
+  end
+  def clear_cache()
+    Dir.glob(File.join(get_cache_path, '*.json')).each do |file|
+      File.delete(file)
+    end
+  end
+  private
+  def do_concurently
+    if MAX_THREADS > 1
+      @thread_pool.post do
+        yield
+      end
+    else
+      yield
+    end
+  end
+  def wait_concurrently
+    if MAX_THREADS > 1
+      @thread_pool.shutdown
+      @thread_pool.wait_for_termination
+    end
+  end
+  def munge_prompt(text, prompt)
+    final_prompt = "#{prompt}"
+    if text.length > @max_chunk_size
+      half = text.length / 2
+      final_prompt = "#{prompt}. Summarize it and keep it under #{half} characters"
+    end
+    return final_prompt
+  end
+  def summarize_split_text(split_text, prompt, split_as_markdown)
+    bar = TTY::ProgressBar.new("processing #{split_text.count} chunks [:bar]", total: split_text.count)
+    json_responses = []
+    split_text.each do |text|
+      do_concurently do
+        final_prompt = munge_prompt(text, prompt)
+        messages = [
+          { role: "user", content: final_prompt },
+          { role: "user", content: text }
+        ]
+        json_responses << post_open_ai(API_COMPLETIONS_PATH, {
+          messages: messages
+        }.to_json)
+        bar.advance
+      end
+    end
+    # wait
+    wait_concurrently
+    return json_responses
+  end
+  def post_open_ai(endpoint, data)
+    # url
+    api_url = ENV["OPENAPI_URL"]
+    final_url = URI.join(api_url, endpoint)
+    # data
+    sanitized_data = data.gsub("'", "")
+    # post
+    api_key = ENV["OPENAPI_KEY"]
+    cmd = """
+      curl -s -X POST \
+        '#{final_url}' \
+        -H 'Content-Type: application/json' \
+        -H 'Authorization: Bearer #{api_key}' \
+        -d '#{sanitized_data}'
+    """
+    response_text = `#{cmd}`
+    begin
+      timestamp = DateTime.now.strftime("%Y%m%d%H%M%S")
+      response_hash = JSON.parse(response_text)
+      # completions
+      raise if endpoint == API_COMPLETIONS_PATH and not response_hash.key?("choices")
+      # cache
+      save_to_cache(sanitized_data, timestamp, "request")
+      save_to_cache(response_text, timestamp, "response")
+      # response
+      return response_hash
+    rescue => e
+      #@logger.error e
+      @logger.error response_text
+      exit
+    end
+  end
+  def get_cache_path
+    cache_path = File.join(@cache_dir, ".open_ai_cache")
+    Dir.mkdir(cache_path) if !File.directory?(cache_path)
+    return cache_path
+  end
+  def save_to_cache(json_string, timestamp, postfix)
+    unless @cache_dir.nil?
+      current_datetime = DateTime.now.strftime("%Y%m%d%H%M%S")
+      filepath = File.join(get_cache_path, "#{current_datetime}_#{postfix}.json")
+      File.open(filepath, "w") do |file|
+        file.write(JSON.pretty_generate(JSON.parse(json_string)))
+      end
+    end
+  end
+end

data/lib/cl/magic/common/ai_text_splitter.rb ADDED Viewed

@@ -0,0 +1,78 @@
+require 'baran'
+class AITextSplitter
+  def initialize(max_chunk_size, logger)
+    @max_chunk_size = max_chunk_size
+    @cut_off = (@max_chunk_size + (@max_chunk_size * 0.1)).floor
+    @logger = logger
+  end
+  def split(data, split_as_markdown=false, separator)
+    return markdown_to_array(data) if split_as_markdown
+    return basic_split_then_reduce(data, separator)
+  end
+  private
+  #
+  # Separator Varients
+  #
+  # basic splitter, would lose context when splits got too small
+  def basic_splitter(data, separator)
+    separator = "\n\n" if separator.nil? or separator.empty?
+    splitter = Baran::CharacterTextSplitter.new(chunk_size: @max_chunk_size, chunk_overlap: 64, separator: separator)
+    chunks = splitter.chunks(data).collect {|c| c[:text]}
+    return reduce_to_max_size(chunks)
+  end
+  # Preferred: provides ever better context through insisting on splits near max_chunk_size
+  def basic_split_then_reduce(data, separator)
+    chunks = basic_splitter(data, separator)
+    return reduce_to_max_size(chunks)
+  end
+  # User can hint at split points; it didn't work great
+  def recursive_splitter(data, separator)
+    separator = ([separator] + ["\n\n"]).compact
+    splitter = Baran::RecursiveCharacterTextSplitter.new(
+      chunk_size: @max_chunk_size, chunk_overlap: 64,
+      separators: separator
+    )
+    chunks = splitter.chunks(data).collect {|c| c[:text]}
+    return reduce_to_max_size(chunks)
+  end
+  #
+  # Markdown
+  #
+  def markdown_to_array(data)
+    splitter = Baran::MarkdownSplitter.new()
+    return splitter.chunks(data).collect {|c| c[:text]}
+  end
+  #
+  # Splitting is done by separator and the LLM can respond
+  # with content of any length. Let's reduce the chunks by
+  # combining smaller responses up to @max_chunk_size
+  #
+  def reduce_to_max_size(chunks)
+    combined = []
+    i = 0
+    while i < chunks.length
+      c = chunks[i]
+      n = chunks[i + 1]
+      unless n.nil? or (c.length + n.length) > @cut_off
+        combined << [c, n].join("\n")
+        i += 2
+      else
+        combined << c
+        i += 1
+      end
+    end
+    combined
+  end
+end

data/lib/cl/magic/common/common_options.rb CHANGED Viewed

@@ -17,7 +17,7 @@ def add_help(opts)
 end
 def ask_and_store_option(options, key, question)
-  if options[key].nil?
+  if options[key].nil? or options[key].empty?
     options[key] = TTY::Prompt.new.ask(question)
   end
 end

data/lib/cl/magic/common/elastic.rb ADDED Viewed

@@ -0,0 +1,41 @@
+class Elastic
+  def initialize(elastic_url)
+    @elastic_url = elastic_url
+  end
+  def query_by_id(ids)
+    url = "/_search"
+    verb = "POST"
+    data = {
+      query: {
+        terms: {
+          _id: ids
+        }
+      }
+    }
+    sanitized_data = data.to_json
+    return post(url, verb, data)
+  end
+  def post(url, verb, data)
+    final_url = "#{@elastic_url}/#{url}"
+    # sanitize
+    sanitized_data = data.to_json
+    ["'", "’"].each { |c| sanitized_data.gsub!(c, "\#{c}") }
+    # post
+    cmd = """
+      curl -s -X#{verb} \
+        #{final_url} \
+        -H 'Content-Type: application/json' \
+        -d '#{sanitized_data}'
+    """
+    return `#{cmd}`
+  end
+  def create_index(elastic_index, body)
+    url = "#{elastic_index}"
+    return post(url, "PUT", body)
+  end
+end