summarize-meeting 0.1.2 → 0.2.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/bin/summarize-meeting +7 -8
- data/lib/summarize-meeting/ai.rb +28 -0
- data/lib/summarize-meeting/meeting.rb +130 -0
- data/lib/summarize-meeting/version.rb +3 -0
- data/lib/summarize-meeting.rb +6 -0
- metadata +6 -4
- data/lib/ai.rb +0 -26
- data/lib/meeting.rb +0 -130
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: bd8ba0fbf2d8b0610cebeccbd7906283f9b39a357dc594026dd31c4853488b92
|
4
|
+
data.tar.gz: 6ab995e6472dc2b9cd749a99f8f5ecb634de40baf8645a66e2b2a761676b235e
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 90d848336e888922379d35d17b54fcd7312e5671005f1731ac93921e724c0b17c013c4230615e0842a771afc824af15280d18db1104ee404ffb9a8c8356545d7
|
7
|
+
data.tar.gz: ba360026742a884fb68cd7c2f5f079b1502da67da3ed9747f30585ff109ca59ddbb00869d18e796ad4ee38d439e4f063ced2a65609ec74f5d6f9b8844344ff28
|
data/bin/summarize-meeting
CHANGED
@@ -1,8 +1,7 @@
|
|
1
1
|
#!/usr/bin/env ruby
|
2
2
|
|
3
3
|
require "optparse"
|
4
|
-
|
5
|
-
require_relative "../lib/meeting"
|
4
|
+
require "summarize-meeting"
|
6
5
|
|
7
6
|
def main
|
8
7
|
options = {}
|
@@ -19,8 +18,8 @@ def main
|
|
19
18
|
options[:output_file] = file
|
20
19
|
end
|
21
20
|
|
22
|
-
if ENV["
|
23
|
-
options[:openai_key] = ENV["
|
21
|
+
if ENV["OPENAI_API_KEY"]
|
22
|
+
options[:openai_key] = ENV["OPENAI_API_KEY"]
|
24
23
|
end
|
25
24
|
|
26
25
|
opts.on("-k", "--openai-key KEY", "The OpenAI API key to use") do |key|
|
@@ -36,8 +35,8 @@ def main
|
|
36
35
|
end
|
37
36
|
end.parse!
|
38
37
|
|
39
|
-
Ai.access_token = options[:openai_key] if options[:openai_key]
|
40
|
-
Ai.organization_id = options[:openai_org] if options[:openai_org]
|
38
|
+
SummarizeMeeting::Ai.access_token = options[:openai_key] if options[:openai_key]
|
39
|
+
SummarizeMeeting::Ai.organization_id = options[:openai_org] if options[:openai_org]
|
41
40
|
|
42
41
|
if ARGV.length != 1
|
43
42
|
puts "Error: You must specify a transcript file to summarize."
|
@@ -47,7 +46,7 @@ def main
|
|
47
46
|
transcript_file = ARGV[0]
|
48
47
|
transcript = File.read(transcript_file)
|
49
48
|
|
50
|
-
meeting = Meeting.new(transcript)
|
49
|
+
meeting = SummarizeMeeting::Meeting.new(transcript)
|
51
50
|
summary = meeting.summarize
|
52
51
|
summary_file_name = if options[:output_file]
|
53
52
|
options[:output_file]
|
@@ -59,6 +58,6 @@ def main
|
|
59
58
|
File.write(summary_file_name, summary)
|
60
59
|
end
|
61
60
|
|
62
|
-
if __FILE__
|
61
|
+
if __FILE__.to_s.end_with?("summarize-meeting") && $0.to_s.end_with?("summarize-meeting")
|
63
62
|
main
|
64
63
|
end
|
@@ -0,0 +1,28 @@
|
|
1
|
+
require "openai"
|
2
|
+
|
3
|
+
module SummarizeMeeting
|
4
|
+
module Ai
|
5
|
+
@@access_token = ENV["OPENAI_KEY"]
|
6
|
+
@@organization_id = ENV["OPENAI_ORG"]
|
7
|
+
|
8
|
+
def self.client
|
9
|
+
OpenAI::Client.new(access_token: access_token, organization_id: organization_id)
|
10
|
+
end
|
11
|
+
|
12
|
+
def self.access_token
|
13
|
+
@@access_token
|
14
|
+
end
|
15
|
+
|
16
|
+
def self.organization_id
|
17
|
+
@@organization_id
|
18
|
+
end
|
19
|
+
|
20
|
+
def self.access_token=(token)
|
21
|
+
@@access_token = token
|
22
|
+
end
|
23
|
+
|
24
|
+
def self.organization_id=(id)
|
25
|
+
@@organization_id = id
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
@@ -0,0 +1,130 @@
|
|
1
|
+
require "json"
|
2
|
+
require "mustache"
|
3
|
+
require "openai"
|
4
|
+
|
5
|
+
module SummarizeMeeting
|
6
|
+
class Meeting
|
7
|
+
LINE_SUMMARY_PROMPT_TEMPLATE = [
|
8
|
+
{
|
9
|
+
role: "system",
|
10
|
+
content: "You are an assistant summarizing a meeting.",
|
11
|
+
},
|
12
|
+
{
|
13
|
+
role: "system",
|
14
|
+
content: "The transcript of the meeting is split into {{chunkCount}} chunks. This is the {{chunkIndex}} chunk.",
|
15
|
+
},
|
16
|
+
{
|
17
|
+
role: "assistant",
|
18
|
+
content: "Please provide me with the next chunk of the transcript.",
|
19
|
+
},
|
20
|
+
{
|
21
|
+
role: "user",
|
22
|
+
content: "{{chunk}}",
|
23
|
+
}
|
24
|
+
]
|
25
|
+
|
26
|
+
CONSOLIDATED_SUMMARY_PROMPT_TEMPLATE = [
|
27
|
+
{
|
28
|
+
role: "system",
|
29
|
+
content: "You are an assistant summarizing a meeting.",
|
30
|
+
},
|
31
|
+
{
|
32
|
+
role: "system",
|
33
|
+
content: "Notes about the meeting have been compiled.",
|
34
|
+
},
|
35
|
+
{
|
36
|
+
role: "system",
|
37
|
+
content: <<~CONTENT
|
38
|
+
Your job is to write a thorough summary of the meeting.
|
39
|
+
The summary should start with a brief overview of the meeting.
|
40
|
+
The summary should be detailed and should extract any action items that were discussed.
|
41
|
+
The summary should be organized into sections with headings and bullet points.
|
42
|
+
The summary should include a list of attendees.
|
43
|
+
The order of the sections should be overview, attendees, action items, and detailed notes by topic.
|
44
|
+
CONTENT
|
45
|
+
},
|
46
|
+
{
|
47
|
+
role: "assistant",
|
48
|
+
content: "Please provide me with notes from the meeting.",
|
49
|
+
},
|
50
|
+
{
|
51
|
+
role: "user",
|
52
|
+
content: "{{notes}}",
|
53
|
+
}
|
54
|
+
]
|
55
|
+
|
56
|
+
def initialize(transcript)
|
57
|
+
@transcript = transcript
|
58
|
+
end
|
59
|
+
|
60
|
+
attr_reader :transcript
|
61
|
+
|
62
|
+
def summarize
|
63
|
+
|
64
|
+
# Step 1. Split the transcript into lines.
|
65
|
+
lines = transcript.split("\n")
|
66
|
+
|
67
|
+
# Step 2. Calculate the maximum chunk size in words.
|
68
|
+
max_total_tokens = 4000
|
69
|
+
response_token_reserve = 500
|
70
|
+
template_tokens = LINE_SUMMARY_PROMPT_TEMPLATE.map { |line| line[:content].split.size }.sum
|
71
|
+
max_chunk_tokens = max_total_tokens - response_token_reserve - template_tokens
|
72
|
+
words_per_token = 0.7
|
73
|
+
max_chunk_word_count = max_chunk_tokens * words_per_token
|
74
|
+
|
75
|
+
# Step 3. Split the transcript into equally sized chunks.
|
76
|
+
chunks = split_lines_into_equal_size_chunks(lines, max_chunk_word_count)
|
77
|
+
|
78
|
+
# Step 4. Summarize each chunk.
|
79
|
+
previous_chunks_summary = ""
|
80
|
+
chunks.each_with_index do |chunk, chunk_index|
|
81
|
+
chunk_summary = summarize_chunk(chunk, chunk_index, chunks.size, previous_chunks_summary)
|
82
|
+
previous_chunks_summary += chunk_summary
|
83
|
+
end
|
84
|
+
|
85
|
+
# Step 5. Write a consolidated summary.
|
86
|
+
consolidated_template = CONSOLIDATED_SUMMARY_PROMPT_TEMPLATE
|
87
|
+
prompt = Mustache.render(consolidated_template.to_json, { notes: previous_chunks_summary.to_json })
|
88
|
+
messages = JSON.parse(prompt)
|
89
|
+
response = SummarizeMeeting::Ai.client.chat(
|
90
|
+
parameters: {
|
91
|
+
model: "gpt-3.5-turbo",
|
92
|
+
messages: messages,
|
93
|
+
}
|
94
|
+
)
|
95
|
+
response.dig("choices", 0, "message", "content")
|
96
|
+
end
|
97
|
+
|
98
|
+
def summarize_chunk(chunk, chunk_index, chunk_count, previous_chunks_summary)
|
99
|
+
template = LINE_SUMMARY_PROMPT_TEMPLATE
|
100
|
+
prompt = Mustache.render(template.to_json, { chunkCount: chunk_count, chunkIndex: chunk_index + 1, chunk: chunk.join("\n").to_json })
|
101
|
+
messages = JSON.parse(prompt)
|
102
|
+
|
103
|
+
response = SummarizeMeeting::Ai.client.chat(
|
104
|
+
parameters: {
|
105
|
+
model: "gpt-3.5-turbo",
|
106
|
+
messages: messages,
|
107
|
+
}
|
108
|
+
)
|
109
|
+
response.dig("choices", 0, "message", "content")
|
110
|
+
end
|
111
|
+
|
112
|
+
def split_lines_into_equal_size_chunks(lines, max_chunk_word_count)
|
113
|
+
chunks = []
|
114
|
+
chunk = []
|
115
|
+
chunk_word_count = 0
|
116
|
+
lines.each do |line|
|
117
|
+
line_word_count = line.split.size
|
118
|
+
if chunk_word_count + line_word_count > max_chunk_word_count
|
119
|
+
chunks << chunk
|
120
|
+
chunk = []
|
121
|
+
chunk_word_count = 0
|
122
|
+
end
|
123
|
+
chunk << line
|
124
|
+
chunk_word_count += line_word_count
|
125
|
+
end
|
126
|
+
chunks << chunk
|
127
|
+
chunks
|
128
|
+
end
|
129
|
+
end
|
130
|
+
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: summarize-meeting
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1
|
4
|
+
version: 0.2.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Sean Devine
|
@@ -133,8 +133,10 @@ files:
|
|
133
133
|
- LICENSE.txt
|
134
134
|
- README.md
|
135
135
|
- bin/summarize-meeting
|
136
|
-
- lib/
|
137
|
-
- lib/meeting.rb
|
136
|
+
- lib/summarize-meeting.rb
|
137
|
+
- lib/summarize-meeting/ai.rb
|
138
|
+
- lib/summarize-meeting/meeting.rb
|
139
|
+
- lib/summarize-meeting/version.rb
|
138
140
|
homepage:
|
139
141
|
licenses:
|
140
142
|
- MIT
|
@@ -142,7 +144,7 @@ metadata: {}
|
|
142
144
|
post_install_message:
|
143
145
|
rdoc_options: []
|
144
146
|
require_paths:
|
145
|
-
-
|
147
|
+
- lib
|
146
148
|
required_ruby_version: !ruby/object:Gem::Requirement
|
147
149
|
requirements:
|
148
150
|
- - ">="
|
data/lib/ai.rb
DELETED
@@ -1,26 +0,0 @@
|
|
1
|
-
require "openai"
|
2
|
-
|
3
|
-
module Ai
|
4
|
-
@@access_token = ENV["OPENAI_KEY"]
|
5
|
-
@@organization_id = ENV["OPENAI_ORG"]
|
6
|
-
|
7
|
-
def self.client
|
8
|
-
OpenAI::Client.new(access_token: access_token, organization_id: organization_id)
|
9
|
-
end
|
10
|
-
|
11
|
-
def self.access_token
|
12
|
-
@@access_token
|
13
|
-
end
|
14
|
-
|
15
|
-
def self.organization_id
|
16
|
-
@@organization_id
|
17
|
-
end
|
18
|
-
|
19
|
-
def self.access_token=(token)
|
20
|
-
@@access_token = token
|
21
|
-
end
|
22
|
-
|
23
|
-
def self.organization_id=(id)
|
24
|
-
@@organization_id = id
|
25
|
-
end
|
26
|
-
end
|
data/lib/meeting.rb
DELETED
@@ -1,130 +0,0 @@
|
|
1
|
-
require "json"
|
2
|
-
require "mustache"
|
3
|
-
require "openai"
|
4
|
-
|
5
|
-
require_relative "./ai"
|
6
|
-
|
7
|
-
class Meeting
|
8
|
-
LINE_SUMMARY_PROMPT_TEMPLATE = [
|
9
|
-
{
|
10
|
-
role: "system",
|
11
|
-
content: "You are an assistant summarizing a meeting.",
|
12
|
-
},
|
13
|
-
{
|
14
|
-
role: "system",
|
15
|
-
content: "The transcript of the meeting is split into {{chunkCount}} chunks. This is the {{chunkIndex}} chunk.",
|
16
|
-
},
|
17
|
-
{
|
18
|
-
role: "assistant",
|
19
|
-
content: "Please provide me with the next chunk of the transcript.",
|
20
|
-
},
|
21
|
-
{
|
22
|
-
role: "user",
|
23
|
-
content: "{{chunk}}",
|
24
|
-
}
|
25
|
-
]
|
26
|
-
|
27
|
-
CONSOLIDATED_SUMMARY_PROMPT_TEMPLATE = [
|
28
|
-
{
|
29
|
-
role: "system",
|
30
|
-
content: "You are an assistant summarizing a meeting.",
|
31
|
-
},
|
32
|
-
{
|
33
|
-
role: "system",
|
34
|
-
content: "Notes about the meeting have been compiled.",
|
35
|
-
},
|
36
|
-
{
|
37
|
-
role: "system",
|
38
|
-
content: <<~CONTENT
|
39
|
-
Your job is to write a thorough summary of the meeting.
|
40
|
-
The summary should start with a brief overview of the meeting.
|
41
|
-
The summary should be detailed and should extract any action items that were discussed.
|
42
|
-
The summary should be organized into sections with headings and bullet points.
|
43
|
-
The summary should include a list of attendees.
|
44
|
-
The order of the sections should be overview, attendees, action items, and detailed notes by topic.
|
45
|
-
CONTENT
|
46
|
-
},
|
47
|
-
{
|
48
|
-
role: "assistant",
|
49
|
-
content: "Please provide me with notes from the meeting.",
|
50
|
-
},
|
51
|
-
{
|
52
|
-
role: "user",
|
53
|
-
content: "{{notes}}",
|
54
|
-
}
|
55
|
-
]
|
56
|
-
|
57
|
-
def initialize(transcript)
|
58
|
-
@transcript = transcript
|
59
|
-
end
|
60
|
-
|
61
|
-
attr_reader :transcript
|
62
|
-
|
63
|
-
def summarize
|
64
|
-
|
65
|
-
# Step 1. Split the transcript into lines.
|
66
|
-
lines = transcript.split("\n")
|
67
|
-
|
68
|
-
# Step 2. Calculate the maximum chunk size in words.
|
69
|
-
max_total_tokens = 4000
|
70
|
-
response_token_reserve = 500
|
71
|
-
template_tokens = LINE_SUMMARY_PROMPT_TEMPLATE.map { |line| line[:content].split.size }.sum
|
72
|
-
max_chunk_tokens = max_total_tokens - response_token_reserve - template_tokens
|
73
|
-
words_per_token = 0.7
|
74
|
-
max_chunk_word_count = max_chunk_tokens * words_per_token
|
75
|
-
|
76
|
-
# Step 3. Split the transcript into equally sized chunks.
|
77
|
-
chunks = split_lines_into_equal_size_chunks(lines, max_chunk_word_count)
|
78
|
-
|
79
|
-
# Step 4. Summarize each chunk.
|
80
|
-
previous_chunks_summary = ""
|
81
|
-
chunks.each_with_index do |chunk, chunk_index|
|
82
|
-
chunk_summary = summarize_chunk(chunk, chunk_index, chunks.size, previous_chunks_summary)
|
83
|
-
previous_chunks_summary += chunk_summary
|
84
|
-
end
|
85
|
-
|
86
|
-
# Step 5. Write a consolidated summary.
|
87
|
-
consolidated_template = CONSOLIDATED_SUMMARY_PROMPT_TEMPLATE
|
88
|
-
prompt = Mustache.render(consolidated_template.to_json, { notes: previous_chunks_summary.to_json })
|
89
|
-
messages = JSON.parse(prompt)
|
90
|
-
response = Ai.client.chat(
|
91
|
-
parameters: {
|
92
|
-
model: "gpt-3.5-turbo",
|
93
|
-
messages: messages,
|
94
|
-
}
|
95
|
-
)
|
96
|
-
response.dig("choices", 0, "message", "content")
|
97
|
-
end
|
98
|
-
|
99
|
-
def summarize_chunk(chunk, chunk_index, chunk_count, previous_chunks_summary)
|
100
|
-
template = LINE_SUMMARY_PROMPT_TEMPLATE
|
101
|
-
prompt = Mustache.render(template.to_json, { chunkCount: chunk_count, chunkIndex: chunk_index + 1, chunk: chunk.join("\n").to_json })
|
102
|
-
messages = JSON.parse(prompt)
|
103
|
-
|
104
|
-
response = Ai.client.chat(
|
105
|
-
parameters: {
|
106
|
-
model: "gpt-3.5-turbo",
|
107
|
-
messages: messages,
|
108
|
-
}
|
109
|
-
)
|
110
|
-
response.dig("choices", 0, "message", "content")
|
111
|
-
end
|
112
|
-
|
113
|
-
def split_lines_into_equal_size_chunks(lines, max_chunk_word_count)
|
114
|
-
chunks = []
|
115
|
-
chunk = []
|
116
|
-
chunk_word_count = 0
|
117
|
-
lines.each do |line|
|
118
|
-
line_word_count = line.split.size
|
119
|
-
if chunk_word_count + line_word_count > max_chunk_word_count
|
120
|
-
chunks << chunk
|
121
|
-
chunk = []
|
122
|
-
chunk_word_count = 0
|
123
|
-
end
|
124
|
-
chunk << line
|
125
|
-
chunk_word_count += line_word_count
|
126
|
-
end
|
127
|
-
chunks << chunk
|
128
|
-
chunks
|
129
|
-
end
|
130
|
-
end
|