langchainrb 0.12.0 → 0.13.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +12 -0
- data/README.md +3 -2
- data/lib/langchain/assistants/assistant.rb +75 -20
- data/lib/langchain/assistants/messages/base.rb +16 -0
- data/lib/langchain/assistants/messages/google_gemini_message.rb +90 -0
- data/lib/langchain/assistants/messages/openai_message.rb +74 -0
- data/lib/langchain/assistants/thread.rb +5 -5
- data/lib/langchain/llm/anthropic.rb +27 -49
- data/lib/langchain/llm/aws_bedrock.rb +30 -34
- data/lib/langchain/llm/azure.rb +6 -0
- data/lib/langchain/llm/base.rb +20 -1
- data/lib/langchain/llm/cohere.rb +38 -6
- data/lib/langchain/llm/google_gemini.rb +67 -0
- data/lib/langchain/llm/google_vertex_ai.rb +68 -112
- data/lib/langchain/llm/mistral_ai.rb +10 -19
- data/lib/langchain/llm/ollama.rb +23 -27
- data/lib/langchain/llm/openai.rb +20 -48
- data/lib/langchain/llm/parameters/chat.rb +51 -0
- data/lib/langchain/llm/response/base_response.rb +2 -2
- data/lib/langchain/llm/response/cohere_response.rb +16 -0
- data/lib/langchain/llm/response/google_gemini_response.rb +45 -0
- data/lib/langchain/llm/response/openai_response.rb +5 -1
- data/lib/langchain/llm/unified_parameters.rb +98 -0
- data/lib/langchain/loader.rb +6 -0
- data/lib/langchain/tool/base.rb +16 -6
- data/lib/langchain/tool/calculator/calculator.json +1 -1
- data/lib/langchain/tool/database/database.json +3 -3
- data/lib/langchain/tool/file_system/file_system.json +3 -3
- data/lib/langchain/tool/news_retriever/news_retriever.json +121 -0
- data/lib/langchain/tool/news_retriever/news_retriever.rb +132 -0
- data/lib/langchain/tool/ruby_code_interpreter/ruby_code_interpreter.json +1 -1
- data/lib/langchain/tool/vectorsearch/vectorsearch.json +1 -1
- data/lib/langchain/tool/weather/weather.json +1 -1
- data/lib/langchain/tool/wikipedia/wikipedia.json +1 -1
- data/lib/langchain/tool/wikipedia/wikipedia.rb +2 -2
- data/lib/langchain/utils/token_length/openai_validator.rb +6 -1
- data/lib/langchain/version.rb +1 -1
- data/lib/langchain.rb +3 -0
- metadata +22 -15
- data/lib/langchain/assistants/message.rb +0 -58
- data/lib/langchain/llm/response/google_vertex_ai_response.rb +0 -33
@@ -45,14 +45,14 @@ module Langchain
|
|
45
45
|
|
46
46
|
# Return the completion candidates
|
47
47
|
#
|
48
|
-
# @return [Array]
|
48
|
+
# @return [Array<String>]
|
49
49
|
def completions
|
50
50
|
raise NotImplementedError
|
51
51
|
end
|
52
52
|
|
53
53
|
# Return the chat completion candidates
|
54
54
|
#
|
55
|
-
# @return [Array]
|
55
|
+
# @return [Array<String>]
|
56
56
|
def chat_completions
|
57
57
|
raise NotImplementedError
|
58
58
|
end
|
@@ -17,5 +17,21 @@ module Langchain::LLM
|
|
17
17
|
def completion
|
18
18
|
completions&.dig(0, "text")
|
19
19
|
end
|
20
|
+
|
21
|
+
def chat_completion
|
22
|
+
raw_response.dig("text")
|
23
|
+
end
|
24
|
+
|
25
|
+
def role
|
26
|
+
raw_response.dig("chat_history").last["role"]
|
27
|
+
end
|
28
|
+
|
29
|
+
def prompt_tokens
|
30
|
+
raw_response.dig("meta", "billed_units", "input_tokens")
|
31
|
+
end
|
32
|
+
|
33
|
+
def completion_tokens
|
34
|
+
raw_response.dig("meta", "billed_units", "output_tokens")
|
35
|
+
end
|
20
36
|
end
|
21
37
|
end
|
@@ -0,0 +1,45 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Langchain::LLM
|
4
|
+
class GoogleGeminiResponse < BaseResponse
|
5
|
+
def initialize(raw_response, model: nil)
|
6
|
+
super(raw_response, model: model)
|
7
|
+
end
|
8
|
+
|
9
|
+
def chat_completion
|
10
|
+
raw_response.dig("candidates", 0, "content", "parts", 0, "text")
|
11
|
+
end
|
12
|
+
|
13
|
+
def role
|
14
|
+
raw_response.dig("candidates", 0, "content", "role")
|
15
|
+
end
|
16
|
+
|
17
|
+
def tool_calls
|
18
|
+
if raw_response.dig("candidates", 0, "content") && raw_response.dig("candidates", 0, "content", "parts", 0).has_key?("functionCall")
|
19
|
+
raw_response.dig("candidates", 0, "content", "parts")
|
20
|
+
else
|
21
|
+
[]
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
25
|
+
def embedding
|
26
|
+
embeddings.first
|
27
|
+
end
|
28
|
+
|
29
|
+
def embeddings
|
30
|
+
[raw_response.dig("predictions", 0, "embeddings", "values")]
|
31
|
+
end
|
32
|
+
|
33
|
+
def prompt_tokens
|
34
|
+
raw_response.dig("usageMetadata", "promptTokenCount")
|
35
|
+
end
|
36
|
+
|
37
|
+
def completion_tokens
|
38
|
+
raw_response.dig("usageMetadata", "candidatesTokenCount")
|
39
|
+
end
|
40
|
+
|
41
|
+
def total_tokens
|
42
|
+
raw_response.dig("usageMetadata", "totalTokenCount")
|
43
|
+
end
|
44
|
+
end
|
45
|
+
end
|
@@ -25,7 +25,11 @@ module Langchain::LLM
|
|
25
25
|
end
|
26
26
|
|
27
27
|
def tool_calls
|
28
|
-
chat_completions
|
28
|
+
if chat_completions.dig(0, "message").has_key?("tool_calls")
|
29
|
+
chat_completions.dig(0, "message", "tool_calls")
|
30
|
+
else
|
31
|
+
[]
|
32
|
+
end
|
29
33
|
end
|
30
34
|
|
31
35
|
def embedding
|
@@ -0,0 +1,98 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Langchain::LLM
|
4
|
+
class UnifiedParameters
|
5
|
+
include Enumerable
|
6
|
+
|
7
|
+
attr_reader :schema, :aliases, :parameters, :ignored, :remapped
|
8
|
+
|
9
|
+
class Null < self
|
10
|
+
def initialize(parameters: {})
|
11
|
+
super(schema: {}, parameters: parameters)
|
12
|
+
end
|
13
|
+
end
|
14
|
+
|
15
|
+
def initialize(schema:, parameters: {})
|
16
|
+
@schema = schema || {}
|
17
|
+
@aliases = {}
|
18
|
+
@remapped = {}
|
19
|
+
@ignored = Set.new
|
20
|
+
@schema.each do |name, param|
|
21
|
+
@aliases[name] = Set.new(Array(param[:aliases])) if param[:aliases]
|
22
|
+
end
|
23
|
+
@parameters = to_params(parameters.to_h) if !parameters.to_h.empty?
|
24
|
+
end
|
25
|
+
|
26
|
+
def to_params(params = {})
|
27
|
+
# if params are provided, reset any previously initialized
|
28
|
+
@parameters = params if !params.empty?
|
29
|
+
@parameters = (@parameters || {}).merge!(params).slice(*schema.keys)
|
30
|
+
@aliases.each do |field, aliased_keys|
|
31
|
+
# favor existing keys in case of conflicts,
|
32
|
+
# and check for multiples
|
33
|
+
aliased_keys.each do |alias_key|
|
34
|
+
@parameters[field] ||= params[alias_key] if value_present?(params[alias_key])
|
35
|
+
end
|
36
|
+
end
|
37
|
+
@schema.each do |field, param_options|
|
38
|
+
param_options ||= {}
|
39
|
+
default = param_options[:default]
|
40
|
+
@parameters[field] ||= default if value_present?(default)
|
41
|
+
end
|
42
|
+
@remapped.each do |field, renamed_field|
|
43
|
+
@parameters[renamed_field] = @parameters[field] if value_present?(@parameters[field])
|
44
|
+
end
|
45
|
+
@parameters = @parameters.except(*@ignored + @remapped.keys)
|
46
|
+
end
|
47
|
+
|
48
|
+
def remap(field_map)
|
49
|
+
@remapped ||= {}
|
50
|
+
@remapped.merge!(field_map)
|
51
|
+
field_map.each do |field, renamed_field|
|
52
|
+
@schema[renamed_field] = @schema[field]
|
53
|
+
end
|
54
|
+
end
|
55
|
+
|
56
|
+
def update(schema = {})
|
57
|
+
@schema.merge!(schema)
|
58
|
+
schema.each do |name, param|
|
59
|
+
if param[:aliases]
|
60
|
+
@aliases[name] ||= Set.new
|
61
|
+
@aliases[name] << param[:aliases]
|
62
|
+
end
|
63
|
+
end
|
64
|
+
self
|
65
|
+
end
|
66
|
+
|
67
|
+
def ignore(*field_names)
|
68
|
+
@ignored.merge(field_names)
|
69
|
+
end
|
70
|
+
|
71
|
+
def alias_field(field_name, as:)
|
72
|
+
@aliases[field_name] ||= Set.new
|
73
|
+
@aliases[field_name] << as
|
74
|
+
end
|
75
|
+
|
76
|
+
def to_h
|
77
|
+
@parameters.to_h
|
78
|
+
end
|
79
|
+
|
80
|
+
def each(&)
|
81
|
+
to_params.each(&)
|
82
|
+
end
|
83
|
+
|
84
|
+
def <=>(other)
|
85
|
+
to_params.<=>(other.to_params)
|
86
|
+
end
|
87
|
+
|
88
|
+
def [](key)
|
89
|
+
to_params[key]
|
90
|
+
end
|
91
|
+
|
92
|
+
private
|
93
|
+
|
94
|
+
def value_present?(value)
|
95
|
+
!value.nil? && (!value.is_a?(Enumerable) || !value.empty?)
|
96
|
+
end
|
97
|
+
end
|
98
|
+
end
|
data/lib/langchain/loader.rb
CHANGED
@@ -29,9 +29,11 @@ module Langchain
|
|
29
29
|
# @param path [String | Pathname] path to file or URL
|
30
30
|
# @param options [Hash] options passed to the processor class used to process the data
|
31
31
|
# @return [Data] data loaded from path
|
32
|
+
# rubocop:disable Style/ArgumentsForwarding
|
32
33
|
def self.load(path, options = {}, &block)
|
33
34
|
new(path, options).load(&block)
|
34
35
|
end
|
36
|
+
# rubocop:enable Style/ArgumentsForwarding
|
35
37
|
|
36
38
|
# Initialize Langchain::Loader
|
37
39
|
# @param path [String | Pathname] path to file or URL
|
@@ -76,12 +78,14 @@ module Langchain
|
|
76
78
|
# @yieldreturn [String] parsed data, as a String
|
77
79
|
#
|
78
80
|
# @return [Data] data that was loaded
|
81
|
+
# rubocop:disable Style/ArgumentsForwarding
|
79
82
|
def load(&block)
|
80
83
|
return process_data(load_from_url, &block) if url?
|
81
84
|
return load_from_directory(&block) if directory?
|
82
85
|
|
83
86
|
process_data(load_from_path, &block)
|
84
87
|
end
|
88
|
+
# rubocop:enable Style/ArgumentsForwarding
|
85
89
|
|
86
90
|
private
|
87
91
|
|
@@ -95,6 +99,7 @@ module Langchain
|
|
95
99
|
raise FileNotFound, "File #{@path} does not exist"
|
96
100
|
end
|
97
101
|
|
102
|
+
# rubocop:disable Style/ArgumentsForwarding
|
98
103
|
def load_from_directory(&block)
|
99
104
|
Dir.glob(File.join(@path, "**/*")).map do |file|
|
100
105
|
# Only load and add to result files with supported extensions
|
@@ -103,6 +108,7 @@ module Langchain
|
|
103
108
|
UnknownFormatError nil
|
104
109
|
end.flatten.compact
|
105
110
|
end
|
111
|
+
# rubocop:enable Style/ArgumentsForwarding
|
106
112
|
|
107
113
|
def process_data(data, &block)
|
108
114
|
@raw_data = data
|
data/lib/langchain/tool/base.rb
CHANGED
@@ -3,13 +3,13 @@
|
|
3
3
|
module Langchain::Tool
|
4
4
|
# = Tools
|
5
5
|
#
|
6
|
-
# Tools are used by Agents to perform specific tasks.
|
6
|
+
# Tools are used by Agents to perform specific tasks. A 'Tool' is a collection of functions ("methods").
|
7
7
|
#
|
8
8
|
# == Available Tools
|
9
9
|
#
|
10
10
|
# - {Langchain::Tool::Calculator}: calculate the result of a math expression
|
11
11
|
# - {Langchain::Tool::Database}: executes SQL queries
|
12
|
-
# - {Langchain::Tool::FileSystem}: interacts with
|
12
|
+
# - {Langchain::Tool::FileSystem}: interacts with the file system
|
13
13
|
# - {Langchain::Tool::GoogleSearch}: search on Google (via SerpAPI)
|
14
14
|
# - {Langchain::Tool::RubyCodeInterpreter}: runs ruby code
|
15
15
|
# - {Langchain::Tool::Weather}: gets current weather data
|
@@ -42,10 +42,10 @@ module Langchain::Tool
|
|
42
42
|
#
|
43
43
|
# == Adding Tools
|
44
44
|
#
|
45
|
-
# 1. Create a new
|
46
|
-
# 2.
|
45
|
+
# 1. Create a new folder in lib/langchain/tool/your_tool_name/
|
46
|
+
# 2. Inside of this folder create a file with a class YourToolName that inherits from {Langchain::Tool::Base}
|
47
47
|
# 3. Add `NAME=` and `ANNOTATIONS_PATH=` constants in your Tool class
|
48
|
-
# 4. Implement various methods in your tool class
|
48
|
+
# 4. Implement various public methods in your tool class
|
49
49
|
# 5. Create a sidecar .json file in the same directory as your tool file annotating the methods in the Open API format
|
50
50
|
# 6. Add your tool to the {file:README.md}
|
51
51
|
class Base
|
@@ -66,11 +66,21 @@ module Langchain::Tool
|
|
66
66
|
|
67
67
|
# Returns the tool as a list of OpenAI formatted functions
|
68
68
|
#
|
69
|
-
# @return [Hash] tool as
|
69
|
+
# @return [Array<Hash>] List of hashes representing the tool as OpenAI formatted functions
|
70
70
|
def to_openai_tools
|
71
71
|
method_annotations
|
72
72
|
end
|
73
73
|
|
74
|
+
# Returns the tool as a list of Google Gemini formatted functions
|
75
|
+
#
|
76
|
+
# @return [Array<Hash>] List of hashes representing the tool as Google Gemini formatted functions
|
77
|
+
def to_google_gemini_tools
|
78
|
+
method_annotations.map do |annotation|
|
79
|
+
# Slice out only the content of the "function" key
|
80
|
+
annotation["function"]
|
81
|
+
end
|
82
|
+
end
|
83
|
+
|
74
84
|
# Return tool's method annotations as JSON
|
75
85
|
#
|
76
86
|
# @return [Hash] Tool's method annotations
|
@@ -2,7 +2,7 @@
|
|
2
2
|
{
|
3
3
|
"type": "function",
|
4
4
|
"function": {
|
5
|
-
"name": "
|
5
|
+
"name": "calculator__execute",
|
6
6
|
"description": "Evaluates a pure math expression or if equation contains non-math characters (e.g.: \"12F in Celsius\") then it uses the google search calculator to evaluate the expression",
|
7
7
|
"parameters": {
|
8
8
|
"type": "object",
|
@@ -2,7 +2,7 @@
|
|
2
2
|
{
|
3
3
|
"type": "function",
|
4
4
|
"function": {
|
5
|
-
"name": "
|
5
|
+
"name": "database__describe_tables",
|
6
6
|
"description": "Database Tool: Returns the schema for a list of tables",
|
7
7
|
"parameters": {
|
8
8
|
"type": "object",
|
@@ -18,7 +18,7 @@
|
|
18
18
|
}, {
|
19
19
|
"type": "function",
|
20
20
|
"function": {
|
21
|
-
"name": "
|
21
|
+
"name": "database__list_tables",
|
22
22
|
"description": "Database Tool: Returns a list of tables in the database",
|
23
23
|
"parameters": {
|
24
24
|
"type": "object",
|
@@ -29,7 +29,7 @@
|
|
29
29
|
}, {
|
30
30
|
"type": "function",
|
31
31
|
"function": {
|
32
|
-
"name": "
|
32
|
+
"name": "database__execute",
|
33
33
|
"description": "Database Tool: Executes a SQL query and returns the results",
|
34
34
|
"parameters": {
|
35
35
|
"type": "object",
|
@@ -2,7 +2,7 @@
|
|
2
2
|
{
|
3
3
|
"type": "function",
|
4
4
|
"function": {
|
5
|
-
"name": "
|
5
|
+
"name": "file_system__list_directory",
|
6
6
|
"description": "File System Tool: Lists out the content of a specified directory",
|
7
7
|
"parameters": {
|
8
8
|
"type": "object",
|
@@ -19,7 +19,7 @@
|
|
19
19
|
{
|
20
20
|
"type": "function",
|
21
21
|
"function": {
|
22
|
-
"name": "
|
22
|
+
"name": "file_system__read_file",
|
23
23
|
"description": "File System Tool: Reads the contents of a file",
|
24
24
|
"parameters": {
|
25
25
|
"type": "object",
|
@@ -36,7 +36,7 @@
|
|
36
36
|
{
|
37
37
|
"type": "function",
|
38
38
|
"function": {
|
39
|
-
"name": "
|
39
|
+
"name": "file_system__write_to_file",
|
40
40
|
"description": "File System Tool: Write content to a file",
|
41
41
|
"parameters": {
|
42
42
|
"type": "object",
|
@@ -0,0 +1,121 @@
|
|
1
|
+
[
|
2
|
+
{
|
3
|
+
"type": "function",
|
4
|
+
"function": {
|
5
|
+
"name": "news_retriever__get_everything",
|
6
|
+
"description": "News Retriever: Search through millions of articles from over 150,000 large and small news sources and blogs.",
|
7
|
+
"parameters": {
|
8
|
+
"type": "object",
|
9
|
+
"properties": {
|
10
|
+
"q": {
|
11
|
+
"type": "string",
|
12
|
+
"description": "Keywords or phrases to search for in the article title and body. Surround phrases with quotes (\") for exact match. Alternatively you can use the AND / OR / NOT keywords, and optionally group these with parenthesis. Must be URL-encoded."
|
13
|
+
},
|
14
|
+
"search_in": {
|
15
|
+
"type": "string",
|
16
|
+
"description": "The fields to restrict your q search to.",
|
17
|
+
"enum": ["title", "description", "content"]
|
18
|
+
},
|
19
|
+
"sources": {
|
20
|
+
"type": "string",
|
21
|
+
"description": "A comma-seperated string of identifiers (maximum 20) for the news sources or blogs you want headlines from. Use the /sources endpoint to locate these programmatically or look at the sources index."
|
22
|
+
},
|
23
|
+
"domains": {
|
24
|
+
"type": "string",
|
25
|
+
"description": "A comma-seperated string of domains (eg bbc.co.uk, techcrunch.com, engadget.com) to restrict the search to."
|
26
|
+
},
|
27
|
+
"exclude_domains": {
|
28
|
+
"type": "string",
|
29
|
+
"description": "A comma-seperated string of domains (eg bbc.co.uk, techcrunch.com, engadget.com) to remove from the results."
|
30
|
+
},
|
31
|
+
"from": {
|
32
|
+
"type": "string",
|
33
|
+
"description": "A date and optional time for the oldest article allowed. This should be in ISO 8601 format."
|
34
|
+
},
|
35
|
+
"to": {
|
36
|
+
"type": "string",
|
37
|
+
"description": "A date and optional time for the newest article allowed. This should be in ISO 8601 format."
|
38
|
+
},
|
39
|
+
"language": {
|
40
|
+
"type": "string",
|
41
|
+
"description": "The 2-letter ISO-639-1 code of the language you want to get headlines for.",
|
42
|
+
"enum": ["ar", "de", "en", "es", "fr", "he", "it", "nl", "no", "pt", "ru", "sv", "ud", "zh"]
|
43
|
+
},
|
44
|
+
"sort_by": {
|
45
|
+
"type": "string",
|
46
|
+
"description": "The order to sort the articles in.",
|
47
|
+
"enum": ["relevancy", "popularity", "publishedAt"]
|
48
|
+
},
|
49
|
+
"page_size": {
|
50
|
+
"type": "integer",
|
51
|
+
"description": "The number of results to return per page (request). 5 is the default, 100 is the maximum."
|
52
|
+
},
|
53
|
+
"page": {
|
54
|
+
"type": "integer",
|
55
|
+
"description": "Use this to page through the results if the total results found is greater than the page size."
|
56
|
+
}
|
57
|
+
}
|
58
|
+
}
|
59
|
+
}
|
60
|
+
},
|
61
|
+
{
|
62
|
+
"type": "function",
|
63
|
+
"function": {
|
64
|
+
"name": "news_retriever__get_top_headlines",
|
65
|
+
"description": "News Retriever: Provides live top and breaking headlines for a country, specific category in a country, single source, or multiple sources. You can also search with keywords. Articles are sorted by the earliest date published first.",
|
66
|
+
"parameters": {
|
67
|
+
"type": "object",
|
68
|
+
"properties": {
|
69
|
+
"country": {
|
70
|
+
"type": "string",
|
71
|
+
"description": "The 2-letter ISO 3166-1 code of the country you want to get headlines for."
|
72
|
+
},
|
73
|
+
"category": {
|
74
|
+
"type": "string",
|
75
|
+
"description": "The category you want to get headlines for.",
|
76
|
+
"enum": ["business", "entertainment", "general", "health", "science", "sports", "technology"]
|
77
|
+
},
|
78
|
+
"q": {
|
79
|
+
"type": "string",
|
80
|
+
"description": "Keywords or a phrase to search for."
|
81
|
+
},
|
82
|
+
"page_size": {
|
83
|
+
"type": "integer",
|
84
|
+
"description": "The number of results to return per page (request). 5 is the default, 100 is the maximum."
|
85
|
+
},
|
86
|
+
"page": {
|
87
|
+
"type": "integer",
|
88
|
+
"description": "Use this to page through the results if the total results found is greater than the page size."
|
89
|
+
}
|
90
|
+
}
|
91
|
+
}
|
92
|
+
}
|
93
|
+
},
|
94
|
+
{
|
95
|
+
"type": "function",
|
96
|
+
"function": {
|
97
|
+
"name": "news_retriever__get_sources",
|
98
|
+
"description": "News Retriever: This endpoint returns the subset of news publishers that top headlines (/v2/top-headlines) are available from. It's mainly a convenience endpoint that you can use to keep track of the publishers available on the API, and you can pipe it straight through to your users.",
|
99
|
+
"parameters": {
|
100
|
+
"type": "object",
|
101
|
+
"properties": {
|
102
|
+
"country": {
|
103
|
+
"type": "string",
|
104
|
+
"description": "The 2-letter ISO 3166-1 code of the country you want to get headlines for. Default: all countries.",
|
105
|
+
"enum": ["ae", "ar", "at", "au", "be", "bg", "br", "ca", "ch", "cn", "co", "cu", "cz", "de", "eg", "fr", "gb", "gr", "hk", "hu", "id", "ie", "il", "in", "it", "jp", "kr", "lt", "lv", "ma", "mx", "my", "ng", "nl", "no", "nz", "ph", "pl", "pt", "ro", "rs", "ru", "sa", "se", "sg", "si", "sk", "th", "tr", "tw", "ua", "us", "ve", "za"]
|
106
|
+
},
|
107
|
+
"category": {
|
108
|
+
"type": "string",
|
109
|
+
"description": "The category you want to get headlines for. Default: all categories.",
|
110
|
+
"enum": ["business", "entertainment", "general", "health", "science", "sports", "technology"]
|
111
|
+
},
|
112
|
+
"language": {
|
113
|
+
"type": "string",
|
114
|
+
"description": "The 2-letter ISO-639-1 code of the language you want to get headlines for.",
|
115
|
+
"enum": ["ar", "de", "en", "es", "fr", "he", "it", "nl", "no", "pt", "ru", "sv", "ud", "zh"]
|
116
|
+
}
|
117
|
+
}
|
118
|
+
}
|
119
|
+
}
|
120
|
+
}
|
121
|
+
]
|
@@ -0,0 +1,132 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Langchain::Tool
|
4
|
+
class NewsRetriever < Base
|
5
|
+
#
|
6
|
+
# A tool that retrieves latest news from various sources via https://newsapi.org/.
|
7
|
+
# An API key needs to be obtained from https://newsapi.org/ to use this tool.
|
8
|
+
#
|
9
|
+
# Usage:
|
10
|
+
# news_retriever = Langchain::Tool::NewsRetriever.new(api_key: ENV["NEWS_API_KEY"])
|
11
|
+
#
|
12
|
+
NAME = "news_retriever"
|
13
|
+
ANNOTATIONS_PATH = Langchain.root.join("./langchain/tool/#{NAME}/#{NAME}.json").to_path
|
14
|
+
|
15
|
+
def initialize(api_key: ENV["NEWS_API_KEY"])
|
16
|
+
@api_key = api_key
|
17
|
+
end
|
18
|
+
|
19
|
+
# Retrieve all news
|
20
|
+
#
|
21
|
+
# @param q [String] Keywords or phrases to search for in the article title and body.
|
22
|
+
# @param search_in [String] The fields to restrict your q search to. The possible options are: title, description, content.
|
23
|
+
# @param sources [String] A comma-seperated string of identifiers (maximum 20) for the news sources or blogs you want headlines from. Use the /sources endpoint to locate these programmatically or look at the sources index.
|
24
|
+
# @param domains [String] A comma-seperated string of domains (eg bbc.co.uk, techcrunch.com, engadget.com) to restrict the search to.
|
25
|
+
# @param exclude_domains [String] A comma-seperated string of domains (eg bbc.co.uk, techcrunch.com, engadget.com) to remove from the results.
|
26
|
+
# @param from [String] A date and optional time for the oldest article allowed. This should be in ISO 8601 format.
|
27
|
+
# @param to [String] A date and optional time for the newest article allowed. This should be in ISO 8601 format.
|
28
|
+
# @param language [String] The 2-letter ISO-639-1 code of the language you want to get headlines for. Possible options: ar, de, en, es, fr, he, it, nl, no, pt, ru, se, ud, zh.
|
29
|
+
# @param sort_by [String] The order to sort the articles in. Possible options: relevancy, popularity, publishedAt.
|
30
|
+
# @param page_size [Integer] The number of results to return per page. 20 is the API's default, 100 is the maximum. Our default is 5.
|
31
|
+
# @param page [Integer] Use this to page through the results.
|
32
|
+
#
|
33
|
+
# @return [String] JSON response
|
34
|
+
def get_everything(
|
35
|
+
q: nil,
|
36
|
+
search_in: nil,
|
37
|
+
sources: nil,
|
38
|
+
domains: nil,
|
39
|
+
exclude_domains: nil,
|
40
|
+
from: nil,
|
41
|
+
to: nil,
|
42
|
+
language: nil,
|
43
|
+
sort_by: nil,
|
44
|
+
page_size: 5, # The API default is 20 but that's too many.
|
45
|
+
page: nil
|
46
|
+
)
|
47
|
+
Langchain.logger.info("Retrieving all news", for: self.class)
|
48
|
+
|
49
|
+
params = {apiKey: @api_key}
|
50
|
+
params[:q] = q if q
|
51
|
+
params[:searchIn] = search_in if search_in
|
52
|
+
params[:sources] = sources if sources
|
53
|
+
params[:domains] = domains if domains
|
54
|
+
params[:excludeDomains] = exclude_domains if exclude_domains
|
55
|
+
params[:from] = from if from
|
56
|
+
params[:to] = to if to
|
57
|
+
params[:language] = language if language
|
58
|
+
params[:sortBy] = sort_by if sort_by
|
59
|
+
params[:pageSize] = page_size if page_size
|
60
|
+
params[:page] = page if page
|
61
|
+
|
62
|
+
send_request(path: "everything", params: params)
|
63
|
+
end
|
64
|
+
|
65
|
+
# Retrieve top headlines
|
66
|
+
#
|
67
|
+
# @param country [String] The 2-letter ISO 3166-1 code of the country you want to get headlines for. Possible options: ae, ar, at, au, be, bg, br, ca, ch, cn, co, cu, cz, de, eg, fr, gb, gr, hk, hu, id, ie, il, in, it, jp, kr, lt, lv, ma, mx, my, ng, nl, no, nz, ph, pl, pt, ro, rs, ru, sa, se, sg, si, sk, th, tr, tw, ua, us, ve, za.
|
68
|
+
# @param category [String] The category you want to get headlines for. Possible options: business, entertainment, general, health, science, sports, technology.
|
69
|
+
# @param sources [String] A comma-seperated string of identifiers for the news sources or blogs you want headlines from. Use the /sources endpoint to locate these programmatically.
|
70
|
+
# @param q [String] Keywords or a phrase to search for.
|
71
|
+
# @param page_size [Integer] The number of results to return per page. 20 is the API's default, 100 is the maximum. Our default is 5.
|
72
|
+
# @param page [Integer] Use this to page through the results.
|
73
|
+
#
|
74
|
+
# @return [String] JSON response
|
75
|
+
def get_top_headlines(
|
76
|
+
country: nil,
|
77
|
+
category: nil,
|
78
|
+
sources: nil,
|
79
|
+
q: nil,
|
80
|
+
page_size: 5,
|
81
|
+
page: nil
|
82
|
+
)
|
83
|
+
Langchain.logger.info("Retrieving top news headlines", for: self.class)
|
84
|
+
|
85
|
+
params = {apiKey: @api_key}
|
86
|
+
params[:country] = country if country
|
87
|
+
params[:category] = category if category
|
88
|
+
params[:sources] = sources if sources
|
89
|
+
params[:q] = q if q
|
90
|
+
params[:pageSize] = page_size if page_size
|
91
|
+
params[:page] = page if page
|
92
|
+
|
93
|
+
send_request(path: "top-headlines", params: params)
|
94
|
+
end
|
95
|
+
|
96
|
+
# Retrieve news sources
|
97
|
+
#
|
98
|
+
# @param category [String] The category you want to get headlines for. Possible options: business, entertainment, general, health, science, sports, technology.
|
99
|
+
# @param language [String] The 2-letter ISO-639-1 code of the language you want to get headlines for. Possible options: ar, de, en, es, fr, he, it, nl, no, pt, ru, se, ud, zh.
|
100
|
+
# @param country [String] The 2-letter ISO 3166-1 code of the country you want to get headlines for. Possible options: ae, ar, at, au, be, bg, br, ca, ch, cn, co, cu, cz, de, eg, fr, gb, gr, hk, hu, id, ie, il, in, it, jp, kr, lt, lv, ma, mx, my, ng, nl, no, nz, ph, pl, pt, ro, rs, ru, sa, se, sg, si, sk, th, tr, tw, ua, us, ve, za.
|
101
|
+
#
|
102
|
+
# @return [String] JSON response
|
103
|
+
def get_sources(
|
104
|
+
category: nil,
|
105
|
+
language: nil,
|
106
|
+
country: nil
|
107
|
+
)
|
108
|
+
Langchain.logger.info("Retrieving news sources", for: self.class)
|
109
|
+
|
110
|
+
params = {apiKey: @api_key}
|
111
|
+
params[:country] = country if country
|
112
|
+
params[:category] = category if category
|
113
|
+
params[:language] = language if language
|
114
|
+
|
115
|
+
send_request(path: "top-headlines/sources", params: params)
|
116
|
+
end
|
117
|
+
|
118
|
+
private
|
119
|
+
|
120
|
+
def send_request(path:, params:)
|
121
|
+
uri = URI.parse("https://newsapi.org/v2/#{path}?#{URI.encode_www_form(params)}")
|
122
|
+
http = Net::HTTP.new(uri.host, uri.port)
|
123
|
+
http.use_ssl = true
|
124
|
+
|
125
|
+
request = Net::HTTP::Get.new(uri.request_uri)
|
126
|
+
request["Content-Type"] = "application/json"
|
127
|
+
|
128
|
+
response = http.request(request)
|
129
|
+
response.body
|
130
|
+
end
|
131
|
+
end
|
132
|
+
end
|
@@ -9,8 +9,8 @@ module Langchain::Tool
|
|
9
9
|
# gem "wikipedia-client", "~> 1.17.0"
|
10
10
|
#
|
11
11
|
# Usage:
|
12
|
-
#
|
13
|
-
#
|
12
|
+
# wikipedia = Langchain::Tool::Wikipedia.new
|
13
|
+
# wikipedia.execute(input: "The Roman Empire")
|
14
14
|
#
|
15
15
|
NAME = "wikipedia"
|
16
16
|
ANNOTATIONS_PATH = Langchain.root.join("./langchain/tool/#{NAME}/#{NAME}.json").to_path
|