langchainrb 0.12.0 → 0.13.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +12 -0
- data/README.md +3 -2
- data/lib/langchain/assistants/assistant.rb +75 -20
- data/lib/langchain/assistants/messages/base.rb +16 -0
- data/lib/langchain/assistants/messages/google_gemini_message.rb +90 -0
- data/lib/langchain/assistants/messages/openai_message.rb +74 -0
- data/lib/langchain/assistants/thread.rb +5 -5
- data/lib/langchain/llm/anthropic.rb +27 -49
- data/lib/langchain/llm/aws_bedrock.rb +30 -34
- data/lib/langchain/llm/azure.rb +6 -0
- data/lib/langchain/llm/base.rb +20 -1
- data/lib/langchain/llm/cohere.rb +38 -6
- data/lib/langchain/llm/google_gemini.rb +67 -0
- data/lib/langchain/llm/google_vertex_ai.rb +68 -112
- data/lib/langchain/llm/mistral_ai.rb +10 -19
- data/lib/langchain/llm/ollama.rb +23 -27
- data/lib/langchain/llm/openai.rb +20 -48
- data/lib/langchain/llm/parameters/chat.rb +51 -0
- data/lib/langchain/llm/response/base_response.rb +2 -2
- data/lib/langchain/llm/response/cohere_response.rb +16 -0
- data/lib/langchain/llm/response/google_gemini_response.rb +45 -0
- data/lib/langchain/llm/response/openai_response.rb +5 -1
- data/lib/langchain/llm/unified_parameters.rb +98 -0
- data/lib/langchain/loader.rb +6 -0
- data/lib/langchain/tool/base.rb +16 -6
- data/lib/langchain/tool/calculator/calculator.json +1 -1
- data/lib/langchain/tool/database/database.json +3 -3
- data/lib/langchain/tool/file_system/file_system.json +3 -3
- data/lib/langchain/tool/news_retriever/news_retriever.json +121 -0
- data/lib/langchain/tool/news_retriever/news_retriever.rb +132 -0
- data/lib/langchain/tool/ruby_code_interpreter/ruby_code_interpreter.json +1 -1
- data/lib/langchain/tool/vectorsearch/vectorsearch.json +1 -1
- data/lib/langchain/tool/weather/weather.json +1 -1
- data/lib/langchain/tool/wikipedia/wikipedia.json +1 -1
- data/lib/langchain/tool/wikipedia/wikipedia.rb +2 -2
- data/lib/langchain/utils/token_length/openai_validator.rb +6 -1
- data/lib/langchain/version.rb +1 -1
- data/lib/langchain.rb +3 -0
- metadata +22 -15
- data/lib/langchain/assistants/message.rb +0 -58
- data/lib/langchain/llm/response/google_vertex_ai_response.rb +0 -33
@@ -45,14 +45,14 @@ module Langchain
|
|
45
45
|
|
46
46
|
# Return the completion candidates
|
47
47
|
#
|
48
|
-
# @return [Array]
|
48
|
+
# @return [Array<String>]
|
49
49
|
def completions
|
50
50
|
raise NotImplementedError
|
51
51
|
end
|
52
52
|
|
53
53
|
# Return the chat completion candidates
|
54
54
|
#
|
55
|
-
# @return [Array]
|
55
|
+
# @return [Array<String>]
|
56
56
|
def chat_completions
|
57
57
|
raise NotImplementedError
|
58
58
|
end
|
@@ -17,5 +17,21 @@ module Langchain::LLM
|
|
17
17
|
def completion
|
18
18
|
completions&.dig(0, "text")
|
19
19
|
end
|
20
|
+
|
21
|
+
def chat_completion
|
22
|
+
raw_response.dig("text")
|
23
|
+
end
|
24
|
+
|
25
|
+
def role
|
26
|
+
raw_response.dig("chat_history").last["role"]
|
27
|
+
end
|
28
|
+
|
29
|
+
def prompt_tokens
|
30
|
+
raw_response.dig("meta", "billed_units", "input_tokens")
|
31
|
+
end
|
32
|
+
|
33
|
+
def completion_tokens
|
34
|
+
raw_response.dig("meta", "billed_units", "output_tokens")
|
35
|
+
end
|
20
36
|
end
|
21
37
|
end
|
@@ -0,0 +1,45 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Langchain::LLM
|
4
|
+
class GoogleGeminiResponse < BaseResponse
|
5
|
+
def initialize(raw_response, model: nil)
|
6
|
+
super(raw_response, model: model)
|
7
|
+
end
|
8
|
+
|
9
|
+
def chat_completion
|
10
|
+
raw_response.dig("candidates", 0, "content", "parts", 0, "text")
|
11
|
+
end
|
12
|
+
|
13
|
+
def role
|
14
|
+
raw_response.dig("candidates", 0, "content", "role")
|
15
|
+
end
|
16
|
+
|
17
|
+
def tool_calls
|
18
|
+
if raw_response.dig("candidates", 0, "content") && raw_response.dig("candidates", 0, "content", "parts", 0).has_key?("functionCall")
|
19
|
+
raw_response.dig("candidates", 0, "content", "parts")
|
20
|
+
else
|
21
|
+
[]
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
25
|
+
def embedding
|
26
|
+
embeddings.first
|
27
|
+
end
|
28
|
+
|
29
|
+
def embeddings
|
30
|
+
[raw_response.dig("predictions", 0, "embeddings", "values")]
|
31
|
+
end
|
32
|
+
|
33
|
+
def prompt_tokens
|
34
|
+
raw_response.dig("usageMetadata", "promptTokenCount")
|
35
|
+
end
|
36
|
+
|
37
|
+
def completion_tokens
|
38
|
+
raw_response.dig("usageMetadata", "candidatesTokenCount")
|
39
|
+
end
|
40
|
+
|
41
|
+
def total_tokens
|
42
|
+
raw_response.dig("usageMetadata", "totalTokenCount")
|
43
|
+
end
|
44
|
+
end
|
45
|
+
end
|
@@ -25,7 +25,11 @@ module Langchain::LLM
|
|
25
25
|
end
|
26
26
|
|
27
27
|
def tool_calls
|
28
|
-
chat_completions
|
28
|
+
if chat_completions.dig(0, "message").has_key?("tool_calls")
|
29
|
+
chat_completions.dig(0, "message", "tool_calls")
|
30
|
+
else
|
31
|
+
[]
|
32
|
+
end
|
29
33
|
end
|
30
34
|
|
31
35
|
def embedding
|
@@ -0,0 +1,98 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Langchain::LLM
|
4
|
+
class UnifiedParameters
|
5
|
+
include Enumerable
|
6
|
+
|
7
|
+
attr_reader :schema, :aliases, :parameters, :ignored, :remapped
|
8
|
+
|
9
|
+
class Null < self
|
10
|
+
def initialize(parameters: {})
|
11
|
+
super(schema: {}, parameters: parameters)
|
12
|
+
end
|
13
|
+
end
|
14
|
+
|
15
|
+
def initialize(schema:, parameters: {})
|
16
|
+
@schema = schema || {}
|
17
|
+
@aliases = {}
|
18
|
+
@remapped = {}
|
19
|
+
@ignored = Set.new
|
20
|
+
@schema.each do |name, param|
|
21
|
+
@aliases[name] = Set.new(Array(param[:aliases])) if param[:aliases]
|
22
|
+
end
|
23
|
+
@parameters = to_params(parameters.to_h) if !parameters.to_h.empty?
|
24
|
+
end
|
25
|
+
|
26
|
+
def to_params(params = {})
|
27
|
+
# if params are provided, reset any previously initialized
|
28
|
+
@parameters = params if !params.empty?
|
29
|
+
@parameters = (@parameters || {}).merge!(params).slice(*schema.keys)
|
30
|
+
@aliases.each do |field, aliased_keys|
|
31
|
+
# favor existing keys in case of conflicts,
|
32
|
+
# and check for multiples
|
33
|
+
aliased_keys.each do |alias_key|
|
34
|
+
@parameters[field] ||= params[alias_key] if value_present?(params[alias_key])
|
35
|
+
end
|
36
|
+
end
|
37
|
+
@schema.each do |field, param_options|
|
38
|
+
param_options ||= {}
|
39
|
+
default = param_options[:default]
|
40
|
+
@parameters[field] ||= default if value_present?(default)
|
41
|
+
end
|
42
|
+
@remapped.each do |field, renamed_field|
|
43
|
+
@parameters[renamed_field] = @parameters[field] if value_present?(@parameters[field])
|
44
|
+
end
|
45
|
+
@parameters = @parameters.except(*@ignored + @remapped.keys)
|
46
|
+
end
|
47
|
+
|
48
|
+
def remap(field_map)
|
49
|
+
@remapped ||= {}
|
50
|
+
@remapped.merge!(field_map)
|
51
|
+
field_map.each do |field, renamed_field|
|
52
|
+
@schema[renamed_field] = @schema[field]
|
53
|
+
end
|
54
|
+
end
|
55
|
+
|
56
|
+
def update(schema = {})
|
57
|
+
@schema.merge!(schema)
|
58
|
+
schema.each do |name, param|
|
59
|
+
if param[:aliases]
|
60
|
+
@aliases[name] ||= Set.new
|
61
|
+
@aliases[name] << param[:aliases]
|
62
|
+
end
|
63
|
+
end
|
64
|
+
self
|
65
|
+
end
|
66
|
+
|
67
|
+
def ignore(*field_names)
|
68
|
+
@ignored.merge(field_names)
|
69
|
+
end
|
70
|
+
|
71
|
+
def alias_field(field_name, as:)
|
72
|
+
@aliases[field_name] ||= Set.new
|
73
|
+
@aliases[field_name] << as
|
74
|
+
end
|
75
|
+
|
76
|
+
def to_h
|
77
|
+
@parameters.to_h
|
78
|
+
end
|
79
|
+
|
80
|
+
def each(&)
|
81
|
+
to_params.each(&)
|
82
|
+
end
|
83
|
+
|
84
|
+
def <=>(other)
|
85
|
+
to_params.<=>(other.to_params)
|
86
|
+
end
|
87
|
+
|
88
|
+
def [](key)
|
89
|
+
to_params[key]
|
90
|
+
end
|
91
|
+
|
92
|
+
private
|
93
|
+
|
94
|
+
def value_present?(value)
|
95
|
+
!value.nil? && (!value.is_a?(Enumerable) || !value.empty?)
|
96
|
+
end
|
97
|
+
end
|
98
|
+
end
|
data/lib/langchain/loader.rb
CHANGED
@@ -29,9 +29,11 @@ module Langchain
|
|
29
29
|
# @param path [String | Pathname] path to file or URL
|
30
30
|
# @param options [Hash] options passed to the processor class used to process the data
|
31
31
|
# @return [Data] data loaded from path
|
32
|
+
# rubocop:disable Style/ArgumentsForwarding
|
32
33
|
def self.load(path, options = {}, &block)
|
33
34
|
new(path, options).load(&block)
|
34
35
|
end
|
36
|
+
# rubocop:enable Style/ArgumentsForwarding
|
35
37
|
|
36
38
|
# Initialize Langchain::Loader
|
37
39
|
# @param path [String | Pathname] path to file or URL
|
@@ -76,12 +78,14 @@ module Langchain
|
|
76
78
|
# @yieldreturn [String] parsed data, as a String
|
77
79
|
#
|
78
80
|
# @return [Data] data that was loaded
|
81
|
+
# rubocop:disable Style/ArgumentsForwarding
|
79
82
|
def load(&block)
|
80
83
|
return process_data(load_from_url, &block) if url?
|
81
84
|
return load_from_directory(&block) if directory?
|
82
85
|
|
83
86
|
process_data(load_from_path, &block)
|
84
87
|
end
|
88
|
+
# rubocop:enable Style/ArgumentsForwarding
|
85
89
|
|
86
90
|
private
|
87
91
|
|
@@ -95,6 +99,7 @@ module Langchain
|
|
95
99
|
raise FileNotFound, "File #{@path} does not exist"
|
96
100
|
end
|
97
101
|
|
102
|
+
# rubocop:disable Style/ArgumentsForwarding
|
98
103
|
def load_from_directory(&block)
|
99
104
|
Dir.glob(File.join(@path, "**/*")).map do |file|
|
100
105
|
# Only load and add to result files with supported extensions
|
@@ -103,6 +108,7 @@ module Langchain
|
|
103
108
|
UnknownFormatError nil
|
104
109
|
end.flatten.compact
|
105
110
|
end
|
111
|
+
# rubocop:enable Style/ArgumentsForwarding
|
106
112
|
|
107
113
|
def process_data(data, &block)
|
108
114
|
@raw_data = data
|
data/lib/langchain/tool/base.rb
CHANGED
@@ -3,13 +3,13 @@
|
|
3
3
|
module Langchain::Tool
|
4
4
|
# = Tools
|
5
5
|
#
|
6
|
-
# Tools are used by Agents to perform specific tasks.
|
6
|
+
# Tools are used by Agents to perform specific tasks. A 'Tool' is a collection of functions ("methods").
|
7
7
|
#
|
8
8
|
# == Available Tools
|
9
9
|
#
|
10
10
|
# - {Langchain::Tool::Calculator}: calculate the result of a math expression
|
11
11
|
# - {Langchain::Tool::Database}: executes SQL queries
|
12
|
-
# - {Langchain::Tool::FileSystem}: interacts with
|
12
|
+
# - {Langchain::Tool::FileSystem}: interacts with the file system
|
13
13
|
# - {Langchain::Tool::GoogleSearch}: search on Google (via SerpAPI)
|
14
14
|
# - {Langchain::Tool::RubyCodeInterpreter}: runs ruby code
|
15
15
|
# - {Langchain::Tool::Weather}: gets current weather data
|
@@ -42,10 +42,10 @@ module Langchain::Tool
|
|
42
42
|
#
|
43
43
|
# == Adding Tools
|
44
44
|
#
|
45
|
-
# 1. Create a new
|
46
|
-
# 2.
|
45
|
+
# 1. Create a new folder in lib/langchain/tool/your_tool_name/
|
46
|
+
# 2. Inside of this folder create a file with a class YourToolName that inherits from {Langchain::Tool::Base}
|
47
47
|
# 3. Add `NAME=` and `ANNOTATIONS_PATH=` constants in your Tool class
|
48
|
-
# 4. Implement various methods in your tool class
|
48
|
+
# 4. Implement various public methods in your tool class
|
49
49
|
# 5. Create a sidecar .json file in the same directory as your tool file annotating the methods in the Open API format
|
50
50
|
# 6. Add your tool to the {file:README.md}
|
51
51
|
class Base
|
@@ -66,11 +66,21 @@ module Langchain::Tool
|
|
66
66
|
|
67
67
|
# Returns the tool as a list of OpenAI formatted functions
|
68
68
|
#
|
69
|
-
# @return [Hash] tool as
|
69
|
+
# @return [Array<Hash>] List of hashes representing the tool as OpenAI formatted functions
|
70
70
|
def to_openai_tools
|
71
71
|
method_annotations
|
72
72
|
end
|
73
73
|
|
74
|
+
# Returns the tool as a list of Google Gemini formatted functions
|
75
|
+
#
|
76
|
+
# @return [Array<Hash>] List of hashes representing the tool as Google Gemini formatted functions
|
77
|
+
def to_google_gemini_tools
|
78
|
+
method_annotations.map do |annotation|
|
79
|
+
# Slice out only the content of the "function" key
|
80
|
+
annotation["function"]
|
81
|
+
end
|
82
|
+
end
|
83
|
+
|
74
84
|
# Return tool's method annotations as JSON
|
75
85
|
#
|
76
86
|
# @return [Hash] Tool's method annotations
|
@@ -2,7 +2,7 @@
|
|
2
2
|
{
|
3
3
|
"type": "function",
|
4
4
|
"function": {
|
5
|
-
"name": "
|
5
|
+
"name": "calculator__execute",
|
6
6
|
"description": "Evaluates a pure math expression or if equation contains non-math characters (e.g.: \"12F in Celsius\") then it uses the google search calculator to evaluate the expression",
|
7
7
|
"parameters": {
|
8
8
|
"type": "object",
|
@@ -2,7 +2,7 @@
|
|
2
2
|
{
|
3
3
|
"type": "function",
|
4
4
|
"function": {
|
5
|
-
"name": "
|
5
|
+
"name": "database__describe_tables",
|
6
6
|
"description": "Database Tool: Returns the schema for a list of tables",
|
7
7
|
"parameters": {
|
8
8
|
"type": "object",
|
@@ -18,7 +18,7 @@
|
|
18
18
|
}, {
|
19
19
|
"type": "function",
|
20
20
|
"function": {
|
21
|
-
"name": "
|
21
|
+
"name": "database__list_tables",
|
22
22
|
"description": "Database Tool: Returns a list of tables in the database",
|
23
23
|
"parameters": {
|
24
24
|
"type": "object",
|
@@ -29,7 +29,7 @@
|
|
29
29
|
}, {
|
30
30
|
"type": "function",
|
31
31
|
"function": {
|
32
|
-
"name": "
|
32
|
+
"name": "database__execute",
|
33
33
|
"description": "Database Tool: Executes a SQL query and returns the results",
|
34
34
|
"parameters": {
|
35
35
|
"type": "object",
|
@@ -2,7 +2,7 @@
|
|
2
2
|
{
|
3
3
|
"type": "function",
|
4
4
|
"function": {
|
5
|
-
"name": "
|
5
|
+
"name": "file_system__list_directory",
|
6
6
|
"description": "File System Tool: Lists out the content of a specified directory",
|
7
7
|
"parameters": {
|
8
8
|
"type": "object",
|
@@ -19,7 +19,7 @@
|
|
19
19
|
{
|
20
20
|
"type": "function",
|
21
21
|
"function": {
|
22
|
-
"name": "
|
22
|
+
"name": "file_system__read_file",
|
23
23
|
"description": "File System Tool: Reads the contents of a file",
|
24
24
|
"parameters": {
|
25
25
|
"type": "object",
|
@@ -36,7 +36,7 @@
|
|
36
36
|
{
|
37
37
|
"type": "function",
|
38
38
|
"function": {
|
39
|
-
"name": "
|
39
|
+
"name": "file_system__write_to_file",
|
40
40
|
"description": "File System Tool: Write content to a file",
|
41
41
|
"parameters": {
|
42
42
|
"type": "object",
|
@@ -0,0 +1,121 @@
|
|
1
|
+
[
|
2
|
+
{
|
3
|
+
"type": "function",
|
4
|
+
"function": {
|
5
|
+
"name": "news_retriever__get_everything",
|
6
|
+
"description": "News Retriever: Search through millions of articles from over 150,000 large and small news sources and blogs.",
|
7
|
+
"parameters": {
|
8
|
+
"type": "object",
|
9
|
+
"properties": {
|
10
|
+
"q": {
|
11
|
+
"type": "string",
|
12
|
+
"description": "Keywords or phrases to search for in the article title and body. Surround phrases with quotes (\") for exact match. Alternatively you can use the AND / OR / NOT keywords, and optionally group these with parenthesis. Must be URL-encoded."
|
13
|
+
},
|
14
|
+
"search_in": {
|
15
|
+
"type": "string",
|
16
|
+
"description": "The fields to restrict your q search to.",
|
17
|
+
"enum": ["title", "description", "content"]
|
18
|
+
},
|
19
|
+
"sources": {
|
20
|
+
"type": "string",
|
21
|
+
"description": "A comma-seperated string of identifiers (maximum 20) for the news sources or blogs you want headlines from. Use the /sources endpoint to locate these programmatically or look at the sources index."
|
22
|
+
},
|
23
|
+
"domains": {
|
24
|
+
"type": "string",
|
25
|
+
"description": "A comma-seperated string of domains (eg bbc.co.uk, techcrunch.com, engadget.com) to restrict the search to."
|
26
|
+
},
|
27
|
+
"exclude_domains": {
|
28
|
+
"type": "string",
|
29
|
+
"description": "A comma-seperated string of domains (eg bbc.co.uk, techcrunch.com, engadget.com) to remove from the results."
|
30
|
+
},
|
31
|
+
"from": {
|
32
|
+
"type": "string",
|
33
|
+
"description": "A date and optional time for the oldest article allowed. This should be in ISO 8601 format."
|
34
|
+
},
|
35
|
+
"to": {
|
36
|
+
"type": "string",
|
37
|
+
"description": "A date and optional time for the newest article allowed. This should be in ISO 8601 format."
|
38
|
+
},
|
39
|
+
"language": {
|
40
|
+
"type": "string",
|
41
|
+
"description": "The 2-letter ISO-639-1 code of the language you want to get headlines for.",
|
42
|
+
"enum": ["ar", "de", "en", "es", "fr", "he", "it", "nl", "no", "pt", "ru", "sv", "ud", "zh"]
|
43
|
+
},
|
44
|
+
"sort_by": {
|
45
|
+
"type": "string",
|
46
|
+
"description": "The order to sort the articles in.",
|
47
|
+
"enum": ["relevancy", "popularity", "publishedAt"]
|
48
|
+
},
|
49
|
+
"page_size": {
|
50
|
+
"type": "integer",
|
51
|
+
"description": "The number of results to return per page (request). 5 is the default, 100 is the maximum."
|
52
|
+
},
|
53
|
+
"page": {
|
54
|
+
"type": "integer",
|
55
|
+
"description": "Use this to page through the results if the total results found is greater than the page size."
|
56
|
+
}
|
57
|
+
}
|
58
|
+
}
|
59
|
+
}
|
60
|
+
},
|
61
|
+
{
|
62
|
+
"type": "function",
|
63
|
+
"function": {
|
64
|
+
"name": "news_retriever__get_top_headlines",
|
65
|
+
"description": "News Retriever: Provides live top and breaking headlines for a country, specific category in a country, single source, or multiple sources. You can also search with keywords. Articles are sorted by the earliest date published first.",
|
66
|
+
"parameters": {
|
67
|
+
"type": "object",
|
68
|
+
"properties": {
|
69
|
+
"country": {
|
70
|
+
"type": "string",
|
71
|
+
"description": "The 2-letter ISO 3166-1 code of the country you want to get headlines for."
|
72
|
+
},
|
73
|
+
"category": {
|
74
|
+
"type": "string",
|
75
|
+
"description": "The category you want to get headlines for.",
|
76
|
+
"enum": ["business", "entertainment", "general", "health", "science", "sports", "technology"]
|
77
|
+
},
|
78
|
+
"q": {
|
79
|
+
"type": "string",
|
80
|
+
"description": "Keywords or a phrase to search for."
|
81
|
+
},
|
82
|
+
"page_size": {
|
83
|
+
"type": "integer",
|
84
|
+
"description": "The number of results to return per page (request). 5 is the default, 100 is the maximum."
|
85
|
+
},
|
86
|
+
"page": {
|
87
|
+
"type": "integer",
|
88
|
+
"description": "Use this to page through the results if the total results found is greater than the page size."
|
89
|
+
}
|
90
|
+
}
|
91
|
+
}
|
92
|
+
}
|
93
|
+
},
|
94
|
+
{
|
95
|
+
"type": "function",
|
96
|
+
"function": {
|
97
|
+
"name": "news_retriever__get_sources",
|
98
|
+
"description": "News Retriever: This endpoint returns the subset of news publishers that top headlines (/v2/top-headlines) are available from. It's mainly a convenience endpoint that you can use to keep track of the publishers available on the API, and you can pipe it straight through to your users.",
|
99
|
+
"parameters": {
|
100
|
+
"type": "object",
|
101
|
+
"properties": {
|
102
|
+
"country": {
|
103
|
+
"type": "string",
|
104
|
+
"description": "The 2-letter ISO 3166-1 code of the country you want to get headlines for. Default: all countries.",
|
105
|
+
"enum": ["ae", "ar", "at", "au", "be", "bg", "br", "ca", "ch", "cn", "co", "cu", "cz", "de", "eg", "fr", "gb", "gr", "hk", "hu", "id", "ie", "il", "in", "it", "jp", "kr", "lt", "lv", "ma", "mx", "my", "ng", "nl", "no", "nz", "ph", "pl", "pt", "ro", "rs", "ru", "sa", "se", "sg", "si", "sk", "th", "tr", "tw", "ua", "us", "ve", "za"]
|
106
|
+
},
|
107
|
+
"category": {
|
108
|
+
"type": "string",
|
109
|
+
"description": "The category you want to get headlines for. Default: all categories.",
|
110
|
+
"enum": ["business", "entertainment", "general", "health", "science", "sports", "technology"]
|
111
|
+
},
|
112
|
+
"language": {
|
113
|
+
"type": "string",
|
114
|
+
"description": "The 2-letter ISO-639-1 code of the language you want to get headlines for.",
|
115
|
+
"enum": ["ar", "de", "en", "es", "fr", "he", "it", "nl", "no", "pt", "ru", "sv", "ud", "zh"]
|
116
|
+
}
|
117
|
+
}
|
118
|
+
}
|
119
|
+
}
|
120
|
+
}
|
121
|
+
]
|
@@ -0,0 +1,132 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Langchain::Tool
|
4
|
+
class NewsRetriever < Base
|
5
|
+
#
|
6
|
+
# A tool that retrieves latest news from various sources via https://newsapi.org/.
|
7
|
+
# An API key needs to be obtained from https://newsapi.org/ to use this tool.
|
8
|
+
#
|
9
|
+
# Usage:
|
10
|
+
# news_retriever = Langchain::Tool::NewsRetriever.new(api_key: ENV["NEWS_API_KEY"])
|
11
|
+
#
|
12
|
+
NAME = "news_retriever"
|
13
|
+
ANNOTATIONS_PATH = Langchain.root.join("./langchain/tool/#{NAME}/#{NAME}.json").to_path
|
14
|
+
|
15
|
+
def initialize(api_key: ENV["NEWS_API_KEY"])
|
16
|
+
@api_key = api_key
|
17
|
+
end
|
18
|
+
|
19
|
+
# Retrieve all news
|
20
|
+
#
|
21
|
+
# @param q [String] Keywords or phrases to search for in the article title and body.
|
22
|
+
# @param search_in [String] The fields to restrict your q search to. The possible options are: title, description, content.
|
23
|
+
# @param sources [String] A comma-seperated string of identifiers (maximum 20) for the news sources or blogs you want headlines from. Use the /sources endpoint to locate these programmatically or look at the sources index.
|
24
|
+
# @param domains [String] A comma-seperated string of domains (eg bbc.co.uk, techcrunch.com, engadget.com) to restrict the search to.
|
25
|
+
# @param exclude_domains [String] A comma-seperated string of domains (eg bbc.co.uk, techcrunch.com, engadget.com) to remove from the results.
|
26
|
+
# @param from [String] A date and optional time for the oldest article allowed. This should be in ISO 8601 format.
|
27
|
+
# @param to [String] A date and optional time for the newest article allowed. This should be in ISO 8601 format.
|
28
|
+
# @param language [String] The 2-letter ISO-639-1 code of the language you want to get headlines for. Possible options: ar, de, en, es, fr, he, it, nl, no, pt, ru, se, ud, zh.
|
29
|
+
# @param sort_by [String] The order to sort the articles in. Possible options: relevancy, popularity, publishedAt.
|
30
|
+
# @param page_size [Integer] The number of results to return per page. 20 is the API's default, 100 is the maximum. Our default is 5.
|
31
|
+
# @param page [Integer] Use this to page through the results.
|
32
|
+
#
|
33
|
+
# @return [String] JSON response
|
34
|
+
def get_everything(
|
35
|
+
q: nil,
|
36
|
+
search_in: nil,
|
37
|
+
sources: nil,
|
38
|
+
domains: nil,
|
39
|
+
exclude_domains: nil,
|
40
|
+
from: nil,
|
41
|
+
to: nil,
|
42
|
+
language: nil,
|
43
|
+
sort_by: nil,
|
44
|
+
page_size: 5, # The API default is 20 but that's too many.
|
45
|
+
page: nil
|
46
|
+
)
|
47
|
+
Langchain.logger.info("Retrieving all news", for: self.class)
|
48
|
+
|
49
|
+
params = {apiKey: @api_key}
|
50
|
+
params[:q] = q if q
|
51
|
+
params[:searchIn] = search_in if search_in
|
52
|
+
params[:sources] = sources if sources
|
53
|
+
params[:domains] = domains if domains
|
54
|
+
params[:excludeDomains] = exclude_domains if exclude_domains
|
55
|
+
params[:from] = from if from
|
56
|
+
params[:to] = to if to
|
57
|
+
params[:language] = language if language
|
58
|
+
params[:sortBy] = sort_by if sort_by
|
59
|
+
params[:pageSize] = page_size if page_size
|
60
|
+
params[:page] = page if page
|
61
|
+
|
62
|
+
send_request(path: "everything", params: params)
|
63
|
+
end
|
64
|
+
|
65
|
+
# Retrieve top headlines
|
66
|
+
#
|
67
|
+
# @param country [String] The 2-letter ISO 3166-1 code of the country you want to get headlines for. Possible options: ae, ar, at, au, be, bg, br, ca, ch, cn, co, cu, cz, de, eg, fr, gb, gr, hk, hu, id, ie, il, in, it, jp, kr, lt, lv, ma, mx, my, ng, nl, no, nz, ph, pl, pt, ro, rs, ru, sa, se, sg, si, sk, th, tr, tw, ua, us, ve, za.
|
68
|
+
# @param category [String] The category you want to get headlines for. Possible options: business, entertainment, general, health, science, sports, technology.
|
69
|
+
# @param sources [String] A comma-seperated string of identifiers for the news sources or blogs you want headlines from. Use the /sources endpoint to locate these programmatically.
|
70
|
+
# @param q [String] Keywords or a phrase to search for.
|
71
|
+
# @param page_size [Integer] The number of results to return per page. 20 is the API's default, 100 is the maximum. Our default is 5.
|
72
|
+
# @param page [Integer] Use this to page through the results.
|
73
|
+
#
|
74
|
+
# @return [String] JSON response
|
75
|
+
def get_top_headlines(
|
76
|
+
country: nil,
|
77
|
+
category: nil,
|
78
|
+
sources: nil,
|
79
|
+
q: nil,
|
80
|
+
page_size: 5,
|
81
|
+
page: nil
|
82
|
+
)
|
83
|
+
Langchain.logger.info("Retrieving top news headlines", for: self.class)
|
84
|
+
|
85
|
+
params = {apiKey: @api_key}
|
86
|
+
params[:country] = country if country
|
87
|
+
params[:category] = category if category
|
88
|
+
params[:sources] = sources if sources
|
89
|
+
params[:q] = q if q
|
90
|
+
params[:pageSize] = page_size if page_size
|
91
|
+
params[:page] = page if page
|
92
|
+
|
93
|
+
send_request(path: "top-headlines", params: params)
|
94
|
+
end
|
95
|
+
|
96
|
+
# Retrieve news sources
|
97
|
+
#
|
98
|
+
# @param category [String] The category you want to get headlines for. Possible options: business, entertainment, general, health, science, sports, technology.
|
99
|
+
# @param language [String] The 2-letter ISO-639-1 code of the language you want to get headlines for. Possible options: ar, de, en, es, fr, he, it, nl, no, pt, ru, se, ud, zh.
|
100
|
+
# @param country [String] The 2-letter ISO 3166-1 code of the country you want to get headlines for. Possible options: ae, ar, at, au, be, bg, br, ca, ch, cn, co, cu, cz, de, eg, fr, gb, gr, hk, hu, id, ie, il, in, it, jp, kr, lt, lv, ma, mx, my, ng, nl, no, nz, ph, pl, pt, ro, rs, ru, sa, se, sg, si, sk, th, tr, tw, ua, us, ve, za.
|
101
|
+
#
|
102
|
+
# @return [String] JSON response
|
103
|
+
def get_sources(
|
104
|
+
category: nil,
|
105
|
+
language: nil,
|
106
|
+
country: nil
|
107
|
+
)
|
108
|
+
Langchain.logger.info("Retrieving news sources", for: self.class)
|
109
|
+
|
110
|
+
params = {apiKey: @api_key}
|
111
|
+
params[:country] = country if country
|
112
|
+
params[:category] = category if category
|
113
|
+
params[:language] = language if language
|
114
|
+
|
115
|
+
send_request(path: "top-headlines/sources", params: params)
|
116
|
+
end
|
117
|
+
|
118
|
+
private
|
119
|
+
|
120
|
+
def send_request(path:, params:)
|
121
|
+
uri = URI.parse("https://newsapi.org/v2/#{path}?#{URI.encode_www_form(params)}")
|
122
|
+
http = Net::HTTP.new(uri.host, uri.port)
|
123
|
+
http.use_ssl = true
|
124
|
+
|
125
|
+
request = Net::HTTP::Get.new(uri.request_uri)
|
126
|
+
request["Content-Type"] = "application/json"
|
127
|
+
|
128
|
+
response = http.request(request)
|
129
|
+
response.body
|
130
|
+
end
|
131
|
+
end
|
132
|
+
end
|
@@ -9,8 +9,8 @@ module Langchain::Tool
|
|
9
9
|
# gem "wikipedia-client", "~> 1.17.0"
|
10
10
|
#
|
11
11
|
# Usage:
|
12
|
-
#
|
13
|
-
#
|
12
|
+
# wikipedia = Langchain::Tool::Wikipedia.new
|
13
|
+
# wikipedia.execute(input: "The Roman Empire")
|
14
14
|
#
|
15
15
|
NAME = "wikipedia"
|
16
16
|
ANNOTATIONS_PATH = Langchain.root.join("./langchain/tool/#{NAME}/#{NAME}.json").to_path
|