RubyGems - cl-magic - Versions diffs - 0.3.9 → 1.2.0 - Mend

cl-magic 0.3.9 → 1.2.0

Files changed (33) hide show

checksums.yaml +4 -4
data/Gemfile.lock +16 -2
data/cl-magic.gemspec +5 -0
data/lib/cl/magic/cl +21 -14
data/lib/cl/magic/cl-ai-chat +117 -0
data/lib/cl/magic/cl-ai-query +116 -0
data/lib/cl/magic/cl-ai-store-jira +158 -0
data/lib/cl/magic/cl-auth +12 -0
data/lib/cl/magic/cl-dk +33 -464
data/lib/cl/magic/cl-dk-make +174 -0
data/lib/cl/magic/cl-dk-make-world +163 -0
data/lib/cl/magic/cl-dk-parts +253 -0
data/lib/cl/magic/cl-dk-world +140 -0
data/lib/cl/magic/cl-jira-fetch +15 -48
data/lib/cl/magic/cl-jira-fetch-by-epics +112 -0
data/lib/cl/magic/cl-jira-to-elastic +126 -0
data/lib/cl/magic/cl-jira-to-markdown +68 -0
data/lib/cl/magic/cl-jira-to-stats +202 -0
data/lib/cl/magic/cl-kube-logs +3 -1
data/lib/cl/magic/cl-poll +20 -5
data/lib/cl/magic/common/ai_prompt.rb +169 -0
data/lib/cl/magic/common/ai_text_splitter.rb +78 -0
data/lib/cl/magic/common/common_options.rb +1 -1
data/lib/cl/magic/common/elastic.rb +41 -0
data/lib/cl/magic/common/jira.rb +173 -45
data/lib/cl/magic/common/milvus.rb +78 -0
data/lib/cl/magic/dk/help_printer.rb +29 -0
data/lib/cl/magic/dk/parts_merger.rb +67 -0
data/lib/cl/magic/dk/world_settings.rb +52 -0
data/lib/cl/magic/dk/yaml_arg_munger.rb +107 -0
data/lib/cl/magic/version.rb +1 -1
metadata +77 -3
data/lib/cl/magic/cl-jira-stats +0 -180

data/lib/cl/magic/common/ai_prompt.rb ADDED Viewed

@@ -0,0 +1,169 @@
+require 'json'
+require 'uri'
+require 'pp'
+require 'digest'
+require 'date'
+require 'tty-progressbar'
+require 'concurrent'
+require 'cl/magic/common/ai_text_splitter.rb'
+class AIPrompt
+  API_COMPLETIONS_PATH = "/openai/v1/chat/completions"
+  API_EMBEDDINGS_PATH = "/openai/v1/embeddings"
+  MAX_THREADS = 10 # set to 1 to debug without concurrency
+  def initialize(logger, cache_dir, max_chunk_size=10000, temperature=1)
+    @cache_dir = cache_dir
+    @logger = logger
+    @max_chunk_size = max_chunk_size
+    @temperature = temperature
+    @ai_text_splitter = AITextSplitter.new(@max_chunk_size, @logger)
+    @thread_pool = Concurrent::ThreadPoolExecutor.new(
+      min_threads: 0,
+      max_threads: MAX_THREADS,
+      max_queue: 0,
+      fallback_policy: :caller_runs
+    )
+  end
+  def gen_embeddings(input)
+    data = {
+      model: "text-embedding-ada-002",
+      input: input,
+    }
+    response = post_open_ai(API_EMBEDDINGS_PATH, data.to_json)
+    return response["data"][0]["embedding"]
+  end
+  def prompt(raw_data, prompt, split_as_markdown=false, separator)
+    # split
+    split_data = @ai_text_splitter.split(raw_data, split_as_markdown, separator)
+    # summarize
+    responses = summarize_split_text(split_data, prompt, split_as_markdown)
+    # map and return
+    return responses.collect do |json|
+      json["choices"].map {|c| c["message"]["content"]}.join("\n")
+    end
+  end
+  def clear_cache()
+    Dir.glob(File.join(get_cache_path, '*.json')).each do |file|
+      File.delete(file)
+    end
+  end
+  private
+  def do_concurently
+    if MAX_THREADS > 1
+      @thread_pool.post do
+        yield
+      end
+    else
+      yield
+    end
+  end
+  def wait_concurrently
+    if MAX_THREADS > 1
+      @thread_pool.shutdown
+      @thread_pool.wait_for_termination
+    end
+  end
+  def munge_prompt(text, prompt)
+    final_prompt = "#{prompt}"
+    if text.length > @max_chunk_size
+      half = text.length / 2
+      final_prompt = "#{prompt}. Summarize it and keep it under #{half} characters"
+    end
+    return final_prompt
+  end
+  def summarize_split_text(split_text, prompt, split_as_markdown)
+    bar = TTY::ProgressBar.new("processing #{split_text.count} chunks [:bar]", total: split_text.count)
+    json_responses = []
+    split_text.each do |text|
+      do_concurently do
+        final_prompt = munge_prompt(text, prompt)
+        messages = [
+          { role: "user", content: final_prompt },
+          { role: "user", content: text }
+        ]
+        json_responses << post_open_ai(API_COMPLETIONS_PATH, {
+          messages: messages
+        }.to_json)
+        bar.advance
+      end
+    end
+    # wait
+    wait_concurrently
+    return json_responses
+  end
+  def post_open_ai(endpoint, data)
+    # url
+    api_url = ENV["OPENAPI_URL"]
+    final_url = URI.join(api_url, endpoint)
+    # data
+    sanitized_data = data.gsub("'", "")
+    # post
+    api_key = ENV["OPENAPI_KEY"]
+    cmd = """
+      curl -s -X POST \
+        '#{final_url}' \
+        -H 'Content-Type: application/json' \
+        -H 'Authorization: Bearer #{api_key}' \
+        -d '#{sanitized_data}'
+    """
+    response_text = `#{cmd}`
+    begin
+      timestamp = DateTime.now.strftime("%Y%m%d%H%M%S")
+      response_hash = JSON.parse(response_text)
+      # completions
+      raise if endpoint == API_COMPLETIONS_PATH and not response_hash.key?("choices")
+      # cache
+      save_to_cache(sanitized_data, timestamp, "request")
+      save_to_cache(response_text, timestamp, "response")
+      # response
+      return response_hash
+    rescue => e
+      #@logger.error e
+      @logger.error response_text
+      exit
+    end
+  end
+  def get_cache_path
+    cache_path = File.join(@cache_dir, ".open_ai_cache")
+    Dir.mkdir(cache_path) if !File.directory?(cache_path)
+    return cache_path
+  end
+  def save_to_cache(json_string, timestamp, postfix)
+    unless @cache_dir.nil?
+      current_datetime = DateTime.now.strftime("%Y%m%d%H%M%S")
+      filepath = File.join(get_cache_path, "#{current_datetime}_#{postfix}.json")
+      File.open(filepath, "w") do |file|
+        file.write(JSON.pretty_generate(JSON.parse(json_string)))
+      end
+    end
+  end
+end

data/lib/cl/magic/common/ai_text_splitter.rb ADDED Viewed

@@ -0,0 +1,78 @@
+require 'baran'
+class AITextSplitter
+  def initialize(max_chunk_size, logger)
+    @max_chunk_size = max_chunk_size
+    @cut_off = (@max_chunk_size + (@max_chunk_size * 0.1)).floor
+    @logger = logger
+  end
+  def split(data, split_as_markdown=false, separator)
+    return markdown_to_array(data) if split_as_markdown
+    return basic_split_then_reduce(data, separator)
+  end
+  private
+  #
+  # Separator Varients
+  #
+  # basic splitter, would lose context when splits got too small
+  def basic_splitter(data, separator)
+    separator = "\n\n" if separator.nil? or separator.empty?
+    splitter = Baran::CharacterTextSplitter.new(chunk_size: @max_chunk_size, chunk_overlap: 64, separator: separator)
+    chunks = splitter.chunks(data).collect {|c| c[:text]}
+    return reduce_to_max_size(chunks)
+  end
+  # Preferred: provides ever better context through insisting on splits near max_chunk_size
+  def basic_split_then_reduce(data, separator)
+    chunks = basic_splitter(data, separator)
+    return reduce_to_max_size(chunks)
+  end
+  # User can hint at split points; it didn't work great
+  def recursive_splitter(data, separator)
+    separator = ([separator] + ["\n\n"]).compact
+    splitter = Baran::RecursiveCharacterTextSplitter.new(
+      chunk_size: @max_chunk_size, chunk_overlap: 64,
+      separators: separator
+    )
+    chunks = splitter.chunks(data).collect {|c| c[:text]}
+    return reduce_to_max_size(chunks)
+  end
+  #
+  # Markdown
+  #
+  def markdown_to_array(data)
+    splitter = Baran::MarkdownSplitter.new()
+    return splitter.chunks(data).collect {|c| c[:text]}
+  end
+  #
+  # Splitting is done by separator and the LLM can respond
+  # with content of any length. Let's reduce the chunks by
+  # combining smaller responses up to @max_chunk_size
+  #
+  def reduce_to_max_size(chunks)
+    combined = []
+    i = 0
+    while i < chunks.length
+      c = chunks[i]
+      n = chunks[i + 1]
+      unless n.nil? or (c.length + n.length) > @cut_off
+        combined << [c, n].join("\n")
+        i += 2
+      else
+        combined << c
+        i += 1
+      end
+    end
+    combined
+  end
+end

data/lib/cl/magic/common/common_options.rb CHANGED Viewed

@@ -17,7 +17,7 @@ def add_help(opts)
 end
 def ask_and_store_option(options, key, question)
-  if options[key].nil?
+  if options[key].nil? or options[key].empty?
     options[key] = TTY::Prompt.new.ask(question)
   end
 end

data/lib/cl/magic/common/elastic.rb ADDED Viewed

@@ -0,0 +1,41 @@
+class Elastic
+  def initialize(elastic_url)
+    @elastic_url = elastic_url
+  end
+  def query_by_id(ids)
+    url = "/_search"
+    verb = "POST"
+    data = {
+      query: {
+        terms: {
+          _id: ids
+        }
+      }
+    }
+    sanitized_data = data.to_json
+    return post(url, verb, data)
+  end
+  def post(url, verb, data)
+    final_url = "#{@elastic_url}/#{url}"
+    # sanitize
+    sanitized_data = data.to_json
+    ["'", "’"].each { |c| sanitized_data.gsub!(c, "\#{c}") }
+    # post
+    cmd = """
+      curl -s -X#{verb} \
+        #{final_url} \
+        -H 'Content-Type: application/json' \
+        -d '#{sanitized_data}'
+    """
+    return `#{cmd}`
+  end
+  def create_index(elastic_index, body)
+    url = "#{elastic_index}"
+    return post(url, "PUT", body)
+  end
+end

data/lib/cl/magic/common/jira.rb CHANGED Viewed

@@ -1,12 +1,70 @@
-require 'byebug'
+require 'tty-progressbar'
+require 'tty-spinner'
+require 'concurrent'
 class Jira
+  MAX_THREADS = 20 # set to 1 to debug without concurrency
   def initialize(base_uri, username, token, break_at_one_page=false)
     @base_uri = base_uri.chomp("/")
     @username = username
     @token = token
     @break_at_one_page = break_at_one_page
+    @thread_pool = Concurrent::ThreadPoolExecutor.new(
+      min_threads: 0,
+      max_threads: MAX_THREADS,
+      max_queue: 0,
+      fallback_policy: :caller_runs
+    )
+  end
+  #
+  # Formatter
+  #
+  def self.jira_to_markdown(issue)
+    md = []
+    md << ""
+    md << "# #{issue['key']}"
+    md << "project:     #{issue['fields']['project']['key']}"
+    md << "created:     #{issue['fields']['created']}"
+    md << "updated:     #{issue['fields']['updated']}"
+    md << "status:      #{issue['fields']['status']['statusCategory']['name']}" unless issue['fields']["status"].nil?
+    md << "priority:    #{issue['fields']['priority']['name']}"
+    md << "labels:      #{issue['fields']['labels'].join(',')}"
+    md << "issue_type:  #{issue['fields']['issuetype']['name']}" unless issue['fields']["issuetype"].nil?
+    md << "assignee:    #{issue['fields']['assignee']['displayName']}" unless issue['fields']["assignee"].nil?
+    md << ""
+    md << "## Summary"
+    md << "#{issue['fields']['summary']}"
+    md << ""
+    md << ""
+    issue_md = md.join("\n")
+    comments = []
+    issue["comments"].each_with_index do |comment, i|
+      c_md = []
+      c_md << "### Comment - #{comment["author"]["displayName"]} "
+      c_md << ""
+      c_md << "created: #{comment["created"]}"
+      # nest markdown deeper
+      comment["body"].split("\n").each do |line|
+        c_md << if line.start_with?("#")
+          "####{line}"
+        else
+          line
+        end
+      end
+      c_md << ""
+      comments << [comment["id"], c_md.join("\n")]
+    end
+    return issue_md, comments
   end
   #
@@ -16,10 +74,12 @@ class Jira
   def get_epic_ids(project, epic_wildcard)
     jql_query = "project = \"#{project}\" AND issuetype = Epic AND text ~ \"#{epic_wildcard}\""
     results = run_jql_query(jql_query)
-    return results.select{|h| h['fields']['summary'].start_with? epic_wildcard}.map {|h| h['id']}
+    epics = results.select{|h| h['fields']['summary'].start_with? epic_wildcard}
+    epic_ids = epics.map {|h| h['id']}
+    return epic_ids, epics
   end
-  def get_issues(project, epic_ids)
+  def get_issues_by_epic_ids(project, epic_ids)
     jql_query = "project = \"#{project}\" AND parentEpic IN (#{epic_ids.join(',')})"
     return run_jql_query(jql_query)
   end
@@ -32,6 +92,14 @@ class Jira
     end
   end
+  def get_issue_comments(issue_key)
+    uri = URI.parse("#{@base_uri}/rest/api/2/issue/#{issue_key}/comment")
+    jira_get(uri) do |response|
+      result = JSON.parse(response.body)
+      return result["comments"]
+    end
+  end
   #
   # Helpers: GET & POST
   #
@@ -49,7 +117,11 @@ class Jira
     if response.code == '200'
       yield response
     else
-      raise "Jira query failed with HTTP status code #{response.code}"
+      raise """
+      Jira query failed with HTTP status code #{response.code}
+      #{response.body}
+      """
     end
   end
@@ -68,7 +140,13 @@ class Jira
     if response.code == '200'
       yield response
     else
-      raise "Jira query failed with HTTP status code #{response.code}"
+      raise """
+      Jira query failed with HTTP status code #{response.code}
+      BODY: #{body.to_json}
+      RESPONSE: #{response.body}
+      """
     end
   end
@@ -77,6 +155,9 @@ class Jira
   #
   def run_jql_query(jql)
+    spinner = TTY::Spinner.new("[:spinner] fetching ...", format: :pulse_2)
+    spinner.auto_spin # Automatic animation with default interval
     start_at = 0
     max_results = 50
     total_results = nil
@@ -110,53 +191,100 @@ class Jira
           start_at += max_results # else next page
         end
       end
-      print '.' # loop
     end
+    spinner.stop("#{all_results.count} issues")
     all_results.map {|h| h}
   end
-end
-#
-# Collect status changelogs
-#
-# Given a array of jira issue hashes
-# * fetch the change log
-# * filter down to status changes
-# * add it to the issue hash as ["status_changelogs"]
-#
-def collect_status_changelogs(jira, issues, options)
-  final_issue_hashes = []
-  issues.each do |issue|
-    issue_key = issue["key"]
-    issue["status_changelogs"] = []
-    # fetch change log
-    print '.'
-    changelogs = jira.get_issue_status_changelog(issue_key)
-    changelogs.each do |change_log|
-      # all items that are status changes
-      status_logs = change_log["items"].select {|i| i["field"]=="status"}
-      status_logs = status_logs.collect do |status_log|
-        {
-          "key": issue_key,
-          "created": change_log["created"],
-          "toString": status_log["toString"],
-          "fromString": status_log["fromString"]
-        }
+  def collect_comments(jira, issues)
+    final_issue_hashes = []
+    bar = TTY::ProgressBar.new("fetching [:bar]", total: issues.count)
+    issues.each do |issue|
+      do_concurently do
+        issue_key = issue["key"]
+        issue["comments"] = []
+        # fetch change log
+        comments = get_issue_comments(issue_key)
+        issue["comments"] = comments
+        final_issue_hashes << issue # save
+        bar.advance
       end
+    end
+    # wait
+    wait_concurrently
+    return final_issue_hashes
+  end
+  #
+  # Collect status changelogs
+  #
+  # Given a array of jira issue hashes
+  # * fetch the change log
+  # * filter down to status changes
+  # * add it to the issue hash as ["status_changelogs"]
+  #
+  def collect_status_changelogs(jira, issues)
+    final_issue_hashes = []
+    bar = TTY::ProgressBar.new("fetching [:bar]", total: issues.count)
+    issues.each do |issue|
+      do_concurently do
+        issue_key = issue["key"]
+        issue["status_changelogs"] = []
+        # fetch change log
+        changelogs = get_issue_status_changelog(issue_key)
+        changelogs.each do |change_log|
+          # all items that are status changes
+          status_logs = change_log["items"].select {|i| i["field"]=="status"}
+          status_logs = status_logs.collect do |status_log|
+            {
+              "key": issue_key,
+              "created": change_log["created"],
+              "toString": status_log["toString"],
+              "fromString": status_log["fromString"]
+            }
+          end
-      # append them to issue
-      status_logs.each do |status_log|
-        issue["status_changelogs"] << status_log
-      end if status_logs.count > 0
+          # append them to issue
+          status_logs.each do |status_log|
+            issue["status_changelogs"] << status_log
+          end
+        end
+        final_issue_hashes << issue # save
+        bar.advance
+      end
     end
-    final_issue_hashes << issue # save
+    # wait
+    wait_concurrently
+    return final_issue_hashes
   end
-  return final_issue_hashes
+  private
+  def do_concurently
+    if MAX_THREADS > 1
+      @thread_pool.post do
+        yield
+      end
+    else
+      yield
+    end
+  end
+  def wait_concurrently
+    if MAX_THREADS > 1
+      @thread_pool.shutdown
+      @thread_pool.wait_for_termination
+    end
+  end
 end

data/lib/cl/magic/common/milvus.rb ADDED Viewed

@@ -0,0 +1,78 @@
+class Milvus
+  def initialize(host, port)
+    @host = host
+    @port = port
+  end
+  def search(collection_name, embedding)
+    final_url = "http://#{@host}:#{@port}/v1/vector/search"
+    data = {
+      collectionName: collection_name,
+      vector: embedding,
+      outputFields: ["id", "name", "doc_key", "distance"],
+    }
+    # post
+    sanitized_data = data.to_json
+    cmd = """
+      curl -s \
+      '#{final_url}' \
+      -X 'POST' \
+      -H 'accept: application/json' \
+      -H 'Content-Type: application/json' \
+      -d '#{sanitized_data}'
+    """
+    return `#{cmd}`
+  end
+  def create_collection(collection_name)
+    final_url = "http://#{@host}:#{@port}/v1/vector/collections/create"
+    data = {
+      dbName: "default",
+      collectionName: collection_name,
+      dimension: 1536,
+      metricType: "L2",
+      primaryField: "id",
+      vectorField: "vector"
+    }
+    # post
+    sanitized_data = data.to_json
+    cmd = """
+      curl -s \
+      '#{final_url}' \
+      -X 'POST' \
+      -H 'accept: application/json' \
+      -H 'Content-Type: application/json' \
+      -d '#{sanitized_data}'
+    """
+    return `#{cmd}`
+  end
+  def post_to_collection(collection_name, doc_key, embedding)
+    final_url = "http://#{@host}:#{@port}/v1/vector/insert"
+    data = {
+      collectionName: collection_name,
+      data: {
+        doc_key: doc_key,
+        vector: embedding
+      }
+    }
+    # post
+    sanitized_data = data.to_json
+    cmd = """
+      curl -s \
+      '#{final_url}' \
+      -X POST \
+      -H 'accept: application/json' \
+      -H 'Content-Type: application/json' \
+      -d '#{sanitized_data}'
+    """
+    response = `#{cmd}`
+    data = JSON.parse(response)
+    raise "Error: #{data.to_json}\n\nData #{sanitized_data}" if data.has_key?("message")
+    return data.to_json
+  end
+end

data/lib/cl/magic/dk/help_printer.rb ADDED Viewed

@@ -0,0 +1,29 @@
+class HelpPrinter
+  def initialize(logger)
+    @logger = logger
+  end
+  def print_dk_help_line(key, help)
+    if $stdout.isatty
+      if help.nil?
+        @logger.puts("#{key.ljust(15, ' ')} ???no help???")
+      else
+        key = key.ljust(15, ' ')
+        help_parts = help.split(";")
+        # first line
+        @logger.puts(key, help_parts.shift)
+        # following lines
+        padding = "".ljust(15, ' ')
+        help_parts.each do |p|
+          @logger.puts(padding, p)
+        end
+        @logger.puts("") if help.end_with?(";")
+      end
+    end
+  end
+end