summarize-meeting 0.1.2 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/bin/summarize-meeting +7 -8
- data/lib/summarize-meeting/ai.rb +28 -0
- data/lib/summarize-meeting/meeting.rb +130 -0
- data/lib/summarize-meeting/version.rb +3 -0
- data/lib/summarize-meeting.rb +6 -0
- metadata +6 -4
- data/lib/ai.rb +0 -26
- data/lib/meeting.rb +0 -130
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 3c14fc93afabfd456a2c3778dffccf9d2a66e77f2d47ab98b864986ad0e10cd2
|
4
|
+
data.tar.gz: 68ba17378b48f84ad029ef1d3d6e1575110739bb4c8f5c40e25a759350722cba
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 40a944d8e2289d02368c1f05c98ef3b26612f40c01c0aa761c110e7f46664b8411f11c7881152b32cc4aa3d1c4db20d726906fdc00ef69c5d362779cd5300e40
|
7
|
+
data.tar.gz: 6477d716ebf27e2b6b7a8a5e15c4ce2ec205e7f089b277002fb5f1ab5273d1e486f2248625571038475fac55e91d81fa77378094615727d0bccae970251b1f74
|
data/bin/summarize-meeting
CHANGED
@@ -1,8 +1,7 @@
|
|
1
1
|
#!/usr/bin/env ruby
|
2
2
|
|
3
3
|
require "optparse"
|
4
|
-
|
5
|
-
require_relative "../lib/meeting"
|
4
|
+
require "summarize-meeting"
|
6
5
|
|
7
6
|
def main
|
8
7
|
options = {}
|
@@ -19,8 +18,8 @@ def main
|
|
19
18
|
options[:output_file] = file
|
20
19
|
end
|
21
20
|
|
22
|
-
if ENV["
|
23
|
-
options[:openai_key] = ENV["
|
21
|
+
if ENV["OPENAI_API_KEY"]
|
22
|
+
options[:openai_key] = ENV["OPENAI_API_KEY"]
|
24
23
|
end
|
25
24
|
|
26
25
|
opts.on("-k", "--openai-key KEY", "The OpenAI API key to use") do |key|
|
@@ -36,8 +35,8 @@ def main
|
|
36
35
|
end
|
37
36
|
end.parse!
|
38
37
|
|
39
|
-
Ai.access_token = options[:openai_key] if options[:openai_key]
|
40
|
-
Ai.organization_id = options[:openai_org] if options[:openai_org]
|
38
|
+
SummarizeMeeting::Ai.access_token = options[:openai_key] if options[:openai_key]
|
39
|
+
SummarizeMeeting::Ai.organization_id = options[:openai_org] if options[:openai_org]
|
41
40
|
|
42
41
|
if ARGV.length != 1
|
43
42
|
puts "Error: You must specify a transcript file to summarize."
|
@@ -47,7 +46,7 @@ def main
|
|
47
46
|
transcript_file = ARGV[0]
|
48
47
|
transcript = File.read(transcript_file)
|
49
48
|
|
50
|
-
meeting = Meeting.new(transcript)
|
49
|
+
meeting = SummarizeMeeting::Meeting.new(transcript)
|
51
50
|
summary = meeting.summarize
|
52
51
|
summary_file_name = if options[:output_file]
|
53
52
|
options[:output_file]
|
@@ -59,6 +58,6 @@ def main
|
|
59
58
|
File.write(summary_file_name, summary)
|
60
59
|
end
|
61
60
|
|
62
|
-
if __FILE__
|
61
|
+
if __FILE__.to_s.end_with?("summarize-meeting") && $0.to_s.end_with?("summarize-meeting")
|
63
62
|
main
|
64
63
|
end
|
@@ -0,0 +1,28 @@
|
|
1
|
+
require "openai"
|
2
|
+
|
3
|
+
module SummarizeMeeting
|
4
|
+
module Ai
|
5
|
+
@@access_token = ENV["OPENAI_KEY"]
|
6
|
+
@@organization_id = ENV["OPENAI_ORG"]
|
7
|
+
|
8
|
+
def self.client
|
9
|
+
OpenAI::Client.new(access_token: access_token, organization_id: organization_id)
|
10
|
+
end
|
11
|
+
|
12
|
+
def self.access_token
|
13
|
+
@@access_token
|
14
|
+
end
|
15
|
+
|
16
|
+
def self.organization_id
|
17
|
+
@@organization_id
|
18
|
+
end
|
19
|
+
|
20
|
+
def self.access_token=(token)
|
21
|
+
@@access_token = token
|
22
|
+
end
|
23
|
+
|
24
|
+
def self.organization_id=(id)
|
25
|
+
@@organization_id = id
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
@@ -0,0 +1,130 @@
|
|
1
|
+
require "json"
|
2
|
+
require "mustache"
|
3
|
+
require "openai"
|
4
|
+
|
5
|
+
module SummarizeMeeting
|
6
|
+
class Meeting
|
7
|
+
LINE_SUMMARY_PROMPT_TEMPLATE = [
|
8
|
+
{
|
9
|
+
role: "system",
|
10
|
+
content: "You are an assistant summarizing a meeting.",
|
11
|
+
},
|
12
|
+
{
|
13
|
+
role: "system",
|
14
|
+
content: "The transcript of the meeting is split into {{chunkCount}} chunks. This is the {{chunkIndex}} chunk.",
|
15
|
+
},
|
16
|
+
{
|
17
|
+
role: "assistant",
|
18
|
+
content: "Please provide me with the next chunk of the transcript.",
|
19
|
+
},
|
20
|
+
{
|
21
|
+
role: "user",
|
22
|
+
content: "{{chunk}}",
|
23
|
+
}
|
24
|
+
]
|
25
|
+
|
26
|
+
CONSOLIDATED_SUMMARY_PROMPT_TEMPLATE = [
|
27
|
+
{
|
28
|
+
role: "system",
|
29
|
+
content: "You are an assistant summarizing a meeting.",
|
30
|
+
},
|
31
|
+
{
|
32
|
+
role: "system",
|
33
|
+
content: "Notes about the meeting have been compiled.",
|
34
|
+
},
|
35
|
+
{
|
36
|
+
role: "system",
|
37
|
+
content: <<~CONTENT
|
38
|
+
Your job is to write a thorough summary of the meeting.
|
39
|
+
The summary should start with a brief overview of the meeting.
|
40
|
+
The summary should be detailed and should extract any action items that were discussed.
|
41
|
+
The summary should be organized into sections with headings and bullet points.
|
42
|
+
The summary should include a list of attendees.
|
43
|
+
The order of the sections should be overview, attendees, action items, and detailed notes by topic.
|
44
|
+
CONTENT
|
45
|
+
},
|
46
|
+
{
|
47
|
+
role: "assistant",
|
48
|
+
content: "Please provide me with notes from the meeting.",
|
49
|
+
},
|
50
|
+
{
|
51
|
+
role: "user",
|
52
|
+
content: "{{notes}}",
|
53
|
+
}
|
54
|
+
]
|
55
|
+
|
56
|
+
def initialize(transcript)
|
57
|
+
@transcript = transcript
|
58
|
+
end
|
59
|
+
|
60
|
+
attr_reader :transcript
|
61
|
+
|
62
|
+
def summarize
|
63
|
+
|
64
|
+
# Step 1. Split the transcript into lines.
|
65
|
+
lines = transcript.split("\n")
|
66
|
+
|
67
|
+
# Step 2. Calculate the maximum chunk size in words.
|
68
|
+
max_total_tokens = 4000
|
69
|
+
response_token_reserve = 500
|
70
|
+
template_tokens = LINE_SUMMARY_PROMPT_TEMPLATE.map { |line| line[:content].split.size }.sum
|
71
|
+
max_chunk_tokens = max_total_tokens - response_token_reserve - template_tokens
|
72
|
+
words_per_token = 0.7
|
73
|
+
max_chunk_word_count = max_chunk_tokens * words_per_token
|
74
|
+
|
75
|
+
# Step 3. Split the transcript into equally sized chunks.
|
76
|
+
chunks = split_lines_into_equal_size_chunks(lines, max_chunk_word_count)
|
77
|
+
|
78
|
+
# Step 4. Summarize each chunk.
|
79
|
+
previous_chunks_summary = ""
|
80
|
+
chunks.each_with_index do |chunk, chunk_index|
|
81
|
+
chunk_summary = summarize_chunk(chunk, chunk_index, chunks.size, previous_chunks_summary)
|
82
|
+
previous_chunks_summary += chunk_summary
|
83
|
+
end
|
84
|
+
|
85
|
+
# Step 5. Write a consolidated summary.
|
86
|
+
consolidated_template = CONSOLIDATED_SUMMARY_PROMPT_TEMPLATE
|
87
|
+
prompt = Mustache.render(consolidated_template.to_json, { notes: previous_chunks_summary.to_json })
|
88
|
+
messages = JSON.parse(prompt)
|
89
|
+
response = SummarizeMeeting::Ai.client.chat(
|
90
|
+
parameters: {
|
91
|
+
model: "gpt-3.5-turbo",
|
92
|
+
messages: messages,
|
93
|
+
}
|
94
|
+
)
|
95
|
+
response.dig("choices", 0, "message", "content")
|
96
|
+
end
|
97
|
+
|
98
|
+
def summarize_chunk(chunk, chunk_index, chunk_count, previous_chunks_summary)
|
99
|
+
template = LINE_SUMMARY_PROMPT_TEMPLATE
|
100
|
+
prompt = Mustache.render(template.to_json, { chunkCount: chunk_count, chunkIndex: chunk_index + 1, chunk: chunk.join("\n").to_json })
|
101
|
+
messages = JSON.parse(prompt)
|
102
|
+
|
103
|
+
response = SummarizeMeeting::Ai.client.chat(
|
104
|
+
parameters: {
|
105
|
+
model: "gpt-3.5-turbo",
|
106
|
+
messages: messages,
|
107
|
+
}
|
108
|
+
)
|
109
|
+
response.dig("choices", 0, "message", "content")
|
110
|
+
end
|
111
|
+
|
112
|
+
def split_lines_into_equal_size_chunks(lines, max_chunk_word_count)
|
113
|
+
chunks = []
|
114
|
+
chunk = []
|
115
|
+
chunk_word_count = 0
|
116
|
+
lines.each do |line|
|
117
|
+
line_word_count = line.split.size
|
118
|
+
if chunk_word_count + line_word_count > max_chunk_word_count
|
119
|
+
chunks << chunk
|
120
|
+
chunk = []
|
121
|
+
chunk_word_count = 0
|
122
|
+
end
|
123
|
+
chunk << line
|
124
|
+
chunk_word_count += line_word_count
|
125
|
+
end
|
126
|
+
chunks << chunk
|
127
|
+
chunks
|
128
|
+
end
|
129
|
+
end
|
130
|
+
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: summarize-meeting
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.2.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Sean Devine
|
@@ -133,8 +133,10 @@ files:
|
|
133
133
|
- LICENSE.txt
|
134
134
|
- README.md
|
135
135
|
- bin/summarize-meeting
|
136
|
-
- lib/
|
137
|
-
- lib/meeting.rb
|
136
|
+
- lib/summarize-meeting.rb
|
137
|
+
- lib/summarize-meeting/ai.rb
|
138
|
+
- lib/summarize-meeting/meeting.rb
|
139
|
+
- lib/summarize-meeting/version.rb
|
138
140
|
homepage:
|
139
141
|
licenses:
|
140
142
|
- MIT
|
@@ -142,7 +144,7 @@ metadata: {}
|
|
142
144
|
post_install_message:
|
143
145
|
rdoc_options: []
|
144
146
|
require_paths:
|
145
|
-
-
|
147
|
+
- lib
|
146
148
|
required_ruby_version: !ruby/object:Gem::Requirement
|
147
149
|
requirements:
|
148
150
|
- - ">="
|
data/lib/ai.rb
DELETED
@@ -1,26 +0,0 @@
|
|
1
|
-
require "openai"
|
2
|
-
|
3
|
-
module Ai
|
4
|
-
@@access_token = ENV["OPENAI_KEY"]
|
5
|
-
@@organization_id = ENV["OPENAI_ORG"]
|
6
|
-
|
7
|
-
def self.client
|
8
|
-
OpenAI::Client.new(access_token: access_token, organization_id: organization_id)
|
9
|
-
end
|
10
|
-
|
11
|
-
def self.access_token
|
12
|
-
@@access_token
|
13
|
-
end
|
14
|
-
|
15
|
-
def self.organization_id
|
16
|
-
@@organization_id
|
17
|
-
end
|
18
|
-
|
19
|
-
def self.access_token=(token)
|
20
|
-
@@access_token = token
|
21
|
-
end
|
22
|
-
|
23
|
-
def self.organization_id=(id)
|
24
|
-
@@organization_id = id
|
25
|
-
end
|
26
|
-
end
|
data/lib/meeting.rb
DELETED
@@ -1,130 +0,0 @@
|
|
1
|
-
require "json"
|
2
|
-
require "mustache"
|
3
|
-
require "openai"
|
4
|
-
|
5
|
-
require_relative "./ai"
|
6
|
-
|
7
|
-
class Meeting
|
8
|
-
LINE_SUMMARY_PROMPT_TEMPLATE = [
|
9
|
-
{
|
10
|
-
role: "system",
|
11
|
-
content: "You are an assistant summarizing a meeting.",
|
12
|
-
},
|
13
|
-
{
|
14
|
-
role: "system",
|
15
|
-
content: "The transcript of the meeting is split into {{chunkCount}} chunks. This is the {{chunkIndex}} chunk.",
|
16
|
-
},
|
17
|
-
{
|
18
|
-
role: "assistant",
|
19
|
-
content: "Please provide me with the next chunk of the transcript.",
|
20
|
-
},
|
21
|
-
{
|
22
|
-
role: "user",
|
23
|
-
content: "{{chunk}}",
|
24
|
-
}
|
25
|
-
]
|
26
|
-
|
27
|
-
CONSOLIDATED_SUMMARY_PROMPT_TEMPLATE = [
|
28
|
-
{
|
29
|
-
role: "system",
|
30
|
-
content: "You are an assistant summarizing a meeting.",
|
31
|
-
},
|
32
|
-
{
|
33
|
-
role: "system",
|
34
|
-
content: "Notes about the meeting have been compiled.",
|
35
|
-
},
|
36
|
-
{
|
37
|
-
role: "system",
|
38
|
-
content: <<~CONTENT
|
39
|
-
Your job is to write a thorough summary of the meeting.
|
40
|
-
The summary should start with a brief overview of the meeting.
|
41
|
-
The summary should be detailed and should extract any action items that were discussed.
|
42
|
-
The summary should be organized into sections with headings and bullet points.
|
43
|
-
The summary should include a list of attendees.
|
44
|
-
The order of the sections should be overview, attendees, action items, and detailed notes by topic.
|
45
|
-
CONTENT
|
46
|
-
},
|
47
|
-
{
|
48
|
-
role: "assistant",
|
49
|
-
content: "Please provide me with notes from the meeting.",
|
50
|
-
},
|
51
|
-
{
|
52
|
-
role: "user",
|
53
|
-
content: "{{notes}}",
|
54
|
-
}
|
55
|
-
]
|
56
|
-
|
57
|
-
def initialize(transcript)
|
58
|
-
@transcript = transcript
|
59
|
-
end
|
60
|
-
|
61
|
-
attr_reader :transcript
|
62
|
-
|
63
|
-
def summarize
|
64
|
-
|
65
|
-
# Step 1. Split the transcript into lines.
|
66
|
-
lines = transcript.split("\n")
|
67
|
-
|
68
|
-
# Step 2. Calculate the maximum chunk size in words.
|
69
|
-
max_total_tokens = 4000
|
70
|
-
response_token_reserve = 500
|
71
|
-
template_tokens = LINE_SUMMARY_PROMPT_TEMPLATE.map { |line| line[:content].split.size }.sum
|
72
|
-
max_chunk_tokens = max_total_tokens - response_token_reserve - template_tokens
|
73
|
-
words_per_token = 0.7
|
74
|
-
max_chunk_word_count = max_chunk_tokens * words_per_token
|
75
|
-
|
76
|
-
# Step 3. Split the transcript into equally sized chunks.
|
77
|
-
chunks = split_lines_into_equal_size_chunks(lines, max_chunk_word_count)
|
78
|
-
|
79
|
-
# Step 4. Summarize each chunk.
|
80
|
-
previous_chunks_summary = ""
|
81
|
-
chunks.each_with_index do |chunk, chunk_index|
|
82
|
-
chunk_summary = summarize_chunk(chunk, chunk_index, chunks.size, previous_chunks_summary)
|
83
|
-
previous_chunks_summary += chunk_summary
|
84
|
-
end
|
85
|
-
|
86
|
-
# Step 5. Write a consolidated summary.
|
87
|
-
consolidated_template = CONSOLIDATED_SUMMARY_PROMPT_TEMPLATE
|
88
|
-
prompt = Mustache.render(consolidated_template.to_json, { notes: previous_chunks_summary.to_json })
|
89
|
-
messages = JSON.parse(prompt)
|
90
|
-
response = Ai.client.chat(
|
91
|
-
parameters: {
|
92
|
-
model: "gpt-3.5-turbo",
|
93
|
-
messages: messages,
|
94
|
-
}
|
95
|
-
)
|
96
|
-
response.dig("choices", 0, "message", "content")
|
97
|
-
end
|
98
|
-
|
99
|
-
def summarize_chunk(chunk, chunk_index, chunk_count, previous_chunks_summary)
|
100
|
-
template = LINE_SUMMARY_PROMPT_TEMPLATE
|
101
|
-
prompt = Mustache.render(template.to_json, { chunkCount: chunk_count, chunkIndex: chunk_index + 1, chunk: chunk.join("\n").to_json })
|
102
|
-
messages = JSON.parse(prompt)
|
103
|
-
|
104
|
-
response = Ai.client.chat(
|
105
|
-
parameters: {
|
106
|
-
model: "gpt-3.5-turbo",
|
107
|
-
messages: messages,
|
108
|
-
}
|
109
|
-
)
|
110
|
-
response.dig("choices", 0, "message", "content")
|
111
|
-
end
|
112
|
-
|
113
|
-
def split_lines_into_equal_size_chunks(lines, max_chunk_word_count)
|
114
|
-
chunks = []
|
115
|
-
chunk = []
|
116
|
-
chunk_word_count = 0
|
117
|
-
lines.each do |line|
|
118
|
-
line_word_count = line.split.size
|
119
|
-
if chunk_word_count + line_word_count > max_chunk_word_count
|
120
|
-
chunks << chunk
|
121
|
-
chunk = []
|
122
|
-
chunk_word_count = 0
|
123
|
-
end
|
124
|
-
chunk << line
|
125
|
-
chunk_word_count += line_word_count
|
126
|
-
end
|
127
|
-
chunks << chunk
|
128
|
-
chunks
|
129
|
-
end
|
130
|
-
end
|