pygpt-net 2.4.42__py3-none-any.whl → 2.4.44__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- CHANGELOG.md +11 -0
- README.md +17 -2
- pygpt_net/CHANGELOG.txt +11 -0
- pygpt_net/__init__.py +3 -3
- pygpt_net/controller/attachment.py +31 -3
- pygpt_net/controller/chat/attachment.py +37 -36
- pygpt_net/controller/config/placeholder.py +6 -4
- pygpt_net/controller/idx/common.py +7 -3
- pygpt_net/core/attachments/__init__.py +7 -2
- pygpt_net/core/attachments/context.py +52 -34
- pygpt_net/core/db/__init__.py +2 -1
- pygpt_net/core/debug/attachments.py +1 -0
- pygpt_net/core/idx/__init__.py +8 -3
- pygpt_net/core/idx/indexing.py +24 -7
- pygpt_net/core/idx/ui/__init__.py +22 -0
- pygpt_net/core/idx/ui/loaders.py +217 -0
- pygpt_net/data/config/config.json +4 -4
- pygpt_net/data/config/models.json +3 -3
- pygpt_net/data/config/modes.json +3 -3
- pygpt_net/data/config/settings.json +5 -5
- pygpt_net/data/locale/locale.de.ini +3 -3
- pygpt_net/data/locale/locale.en.ini +11 -9
- pygpt_net/data/locale/locale.es.ini +3 -3
- pygpt_net/data/locale/locale.fr.ini +3 -3
- pygpt_net/data/locale/locale.it.ini +3 -3
- pygpt_net/data/locale/locale.pl.ini +3 -3
- pygpt_net/data/locale/locale.uk.ini +3 -3
- pygpt_net/data/locale/locale.zh.ini +3 -3
- pygpt_net/data/locale/plugin.mailer.en.ini +5 -5
- pygpt_net/item/attachment.py +5 -1
- pygpt_net/item/ctx.py +99 -2
- pygpt_net/migrations/Version20241215110000.py +25 -0
- pygpt_net/migrations/__init__.py +3 -1
- pygpt_net/plugin/cmd_files/__init__.py +3 -2
- pygpt_net/provider/core/attachment/json_file.py +4 -1
- pygpt_net/provider/core/config/patch.py +6 -0
- pygpt_net/provider/core/ctx/db_sqlite/storage.py +50 -7
- pygpt_net/provider/core/ctx/db_sqlite/utils.py +29 -5
- pygpt_net/provider/loaders/base.py +14 -0
- pygpt_net/provider/loaders/hub/yt/base.py +5 -0
- pygpt_net/provider/loaders/web_database.py +13 -5
- pygpt_net/provider/loaders/web_github_issues.py +5 -1
- pygpt_net/provider/loaders/web_google_calendar.py +9 -1
- pygpt_net/provider/loaders/web_google_docs.py +6 -1
- pygpt_net/provider/loaders/web_google_drive.py +10 -1
- pygpt_net/provider/loaders/web_google_gmail.py +2 -1
- pygpt_net/provider/loaders/web_google_keep.py +5 -1
- pygpt_net/provider/loaders/web_google_sheets.py +5 -1
- pygpt_net/provider/loaders/web_microsoft_onedrive.py +15 -1
- pygpt_net/provider/loaders/web_page.py +4 -2
- pygpt_net/provider/loaders/web_rss.py +2 -1
- pygpt_net/provider/loaders/web_sitemap.py +2 -1
- pygpt_net/provider/loaders/web_twitter.py +4 -2
- pygpt_net/provider/loaders/web_yt.py +17 -2
- pygpt_net/provider/vector_stores/ctx_attachment.py +1 -1
- pygpt_net/tools/indexer/__init__.py +8 -40
- pygpt_net/tools/indexer/ui/web.py +20 -78
- pygpt_net/ui/layout/ctx/ctx_list.py +86 -18
- pygpt_net/ui/widget/dialog/url.py +151 -14
- pygpt_net/ui/widget/element/group.py +15 -2
- pygpt_net/ui/widget/lists/context.py +23 -9
- pygpt_net/utils.py +1 -1
- {pygpt_net-2.4.42.dist-info → pygpt_net-2.4.44.dist-info}/METADATA +18 -3
- {pygpt_net-2.4.42.dist-info → pygpt_net-2.4.44.dist-info}/RECORD +67 -64
- {pygpt_net-2.4.42.dist-info → pygpt_net-2.4.44.dist-info}/LICENSE +0 -0
- {pygpt_net-2.4.42.dist-info → pygpt_net-2.4.44.dist-info}/WHEEL +0 -0
- {pygpt_net-2.4.42.dist-info → pygpt_net-2.4.44.dist-info}/entry_points.txt +0 -0
@@ -68,6 +68,9 @@ class Storage:
|
|
68
68
|
# only base by default
|
69
69
|
where_clauses.append("(m.root_id IS NULL OR m.root_id = 0)")
|
70
70
|
|
71
|
+
# join group
|
72
|
+
join_clauses.append("LEFT JOIN ctx_group g ON m.group_id = g.id")
|
73
|
+
|
71
74
|
# search_string
|
72
75
|
if search_string:
|
73
76
|
date_ranges = search_by_date_string(search_string)
|
@@ -106,15 +109,16 @@ class Storage:
|
|
106
109
|
continue
|
107
110
|
mode = filter.get('mode', '=')
|
108
111
|
value = filter.get('value', '')
|
112
|
+
key_name = 'm.' + key
|
109
113
|
if isinstance(value, int):
|
110
|
-
where_clauses.append(f"{
|
114
|
+
where_clauses.append(f"{key_name} {mode} :{key}")
|
111
115
|
bind_params[key] = value
|
112
116
|
elif isinstance(value, str):
|
113
|
-
where_clauses.append(f"{
|
117
|
+
where_clauses.append(f"{key_name} {mode} :{key}")
|
114
118
|
bind_params[key] = f"%{value}%"
|
115
119
|
elif isinstance(value, list):
|
116
120
|
values = "(" + ",".join([str(x) for x in value]) + ")"
|
117
|
-
where_clauses.append(f"{
|
121
|
+
where_clauses.append(f"{key_name} {mode} {values}")
|
118
122
|
|
119
123
|
where_statement = " AND ".join(where_clauses) if where_clauses else "1"
|
120
124
|
join_statement = " ".join(join_clauses) if join_clauses else ""
|
@@ -154,8 +158,18 @@ class Storage:
|
|
154
158
|
append_date_ranges=True,
|
155
159
|
)
|
156
160
|
stmt_text = f"""
|
157
|
-
SELECT
|
158
|
-
|
161
|
+
SELECT
|
162
|
+
m.*,
|
163
|
+
g.name as group_name,
|
164
|
+
g.uuid as group_uuid,
|
165
|
+
g.additional_ctx_json as group_additional_ctx_json
|
166
|
+
FROM
|
167
|
+
ctx_meta m
|
168
|
+
{join_statement}
|
169
|
+
WHERE
|
170
|
+
{where_statement}
|
171
|
+
ORDER BY
|
172
|
+
m.updated_ts DESC {limit_suffix}
|
159
173
|
"""
|
160
174
|
stmt = text(stmt_text).bindparams(**bind_params)
|
161
175
|
|
@@ -177,7 +191,17 @@ class Storage:
|
|
177
191
|
:return: dict of CtxMeta
|
178
192
|
"""
|
179
193
|
stmt_text = f"""
|
180
|
-
SELECT
|
194
|
+
SELECT
|
195
|
+
m.*,
|
196
|
+
g.name as group_name,
|
197
|
+
g.uuid as group_uuid,
|
198
|
+
g.additional_ctx_json as group_additional_ctx_json
|
199
|
+
FROM
|
200
|
+
ctx_meta m
|
201
|
+
LEFT JOIN
|
202
|
+
ctx_group g ON m.group_id = g.id
|
203
|
+
WHERE
|
204
|
+
indexed_ts > 0
|
181
205
|
"""
|
182
206
|
stmt = text(stmt_text)
|
183
207
|
items = {}
|
@@ -431,7 +455,26 @@ class Storage:
|
|
431
455
|
)
|
432
456
|
with db.begin() as conn:
|
433
457
|
conn.execute(stmt)
|
434
|
-
|
458
|
+
|
459
|
+
# update group
|
460
|
+
if meta.group:
|
461
|
+
stmt = text("""
|
462
|
+
UPDATE ctx_group
|
463
|
+
SET
|
464
|
+
name = :name,
|
465
|
+
additional_ctx_json = :additional_ctx_json,
|
466
|
+
updated_ts = :updated_ts
|
467
|
+
WHERE id = :id
|
468
|
+
""").bindparams(
|
469
|
+
id=meta.group.id,
|
470
|
+
name=meta.group.name,
|
471
|
+
additional_ctx_json=pack_item_value(meta.group.additional_ctx),
|
472
|
+
updated_ts=int(time.time()),
|
473
|
+
)
|
474
|
+
with db.begin() as conn:
|
475
|
+
conn.execute(stmt)
|
476
|
+
|
477
|
+
return True
|
435
478
|
|
436
479
|
def update_meta_all(
|
437
480
|
self,
|
@@ -125,7 +125,10 @@ def unpack_item_value(value: Any) -> Any:
|
|
125
125
|
return value
|
126
126
|
|
127
127
|
|
128
|
-
def unpack_item(
|
128
|
+
def unpack_item(
|
129
|
+
item: CtxItem,
|
130
|
+
row: Dict[str, Any]
|
131
|
+
) -> CtxItem:
|
129
132
|
"""
|
130
133
|
Unpack context item from DB row
|
131
134
|
|
@@ -185,9 +188,12 @@ def unpack_item(item: CtxItem, row: Dict[str, Any]) -> CtxItem:
|
|
185
188
|
return item
|
186
189
|
|
187
190
|
|
188
|
-
def unpack_meta(
|
191
|
+
def unpack_meta(
|
192
|
+
meta: CtxMeta,
|
193
|
+
row: Dict[str, Any]
|
194
|
+
) -> CtxMeta:
|
189
195
|
"""
|
190
|
-
Unpack context meta
|
196
|
+
Unpack context meta-data from DB row
|
191
197
|
|
192
198
|
:param meta: Context meta (CtxMeta)
|
193
199
|
:param row: DB row
|
@@ -221,20 +227,38 @@ def unpack_meta(meta: CtxMeta, row: Dict[str, Any]) -> CtxMeta:
|
|
221
227
|
|
222
228
|
if meta.additional_ctx is None:
|
223
229
|
meta.additional_ctx = []
|
230
|
+
|
231
|
+
# add group if exists
|
232
|
+
if meta.group_id:
|
233
|
+
group = CtxGroup()
|
234
|
+
group.id = meta.group_id
|
235
|
+
group.uuid = row['group_uuid']
|
236
|
+
group.name = row['group_name']
|
237
|
+
group.additional_ctx = unpack_item_value(row['group_additional_ctx_json'])
|
238
|
+
if group.additional_ctx is None:
|
239
|
+
group.additional_ctx = []
|
240
|
+
meta.group = group
|
241
|
+
|
224
242
|
return meta
|
225
243
|
|
226
244
|
|
227
|
-
def unpack_group(
|
245
|
+
def unpack_group(
|
246
|
+
group: CtxGroup,
|
247
|
+
row: Dict[str, Any]
|
248
|
+
) -> CtxGroup:
|
228
249
|
"""
|
229
250
|
Unpack context group data from DB row
|
230
251
|
|
231
252
|
:param group: Context group (CtxGroup)
|
232
253
|
:param row: DB row
|
233
|
-
:return: context
|
254
|
+
:return: context group
|
234
255
|
"""
|
235
256
|
group.id = unpack_var(row['id'], 'int')
|
236
257
|
group.uuid = row['uuid']
|
237
258
|
group.created = unpack_var(row['created_ts'], 'int')
|
238
259
|
group.updated = unpack_var(row['updated_ts'], 'int')
|
239
260
|
group.name = row['name']
|
261
|
+
group.additional_ctx = unpack_item_value(row['additional_ctx_json'])
|
262
|
+
if group.additional_ctx is None:
|
263
|
+
group.additional_ctx = []
|
240
264
|
return group
|
@@ -22,7 +22,9 @@ class BaseLoader:
|
|
22
22
|
self.instructions = [] # list of instructions for 'web_index' command for how to handle this type
|
23
23
|
self.args = {} # custom keyword arguments
|
24
24
|
self.init_args = {} # initial keyword arguments
|
25
|
+
self.init_args_labels = {}
|
25
26
|
self.init_args_types = {}
|
27
|
+
self.init_args_desc = {}
|
26
28
|
self.allow_compiled = True # allow in compiled and Snap versions
|
27
29
|
# This is required due to some readers may require Python environment to install additional packages
|
28
30
|
|
@@ -42,6 +44,18 @@ class BaseLoader:
|
|
42
44
|
"""
|
43
45
|
self.args = args
|
44
46
|
|
47
|
+
def explode(self, value: str) -> list:
|
48
|
+
"""
|
49
|
+
Explode list from string
|
50
|
+
|
51
|
+
:param value: value string
|
52
|
+
:return: list
|
53
|
+
"""
|
54
|
+
if value:
|
55
|
+
items = value.split(",")
|
56
|
+
return [item.strip() for item in items]
|
57
|
+
return []
|
58
|
+
|
45
59
|
def get_args(self):
|
46
60
|
"""
|
47
61
|
Prepare keyword arguments for reader init method
|
@@ -15,6 +15,7 @@ class YoutubeTranscriptReader(BasePydanticReader):
|
|
15
15
|
"""Youtube Transcript reader."""
|
16
16
|
|
17
17
|
is_remote: bool = True
|
18
|
+
languages: List[str] = ["en"]
|
18
19
|
|
19
20
|
@classmethod
|
20
21
|
def class_name(cls) -> str:
|
@@ -34,6 +35,10 @@ class YoutubeTranscriptReader(BasePydanticReader):
|
|
34
35
|
for which transcripts are to be read.
|
35
36
|
|
36
37
|
"""
|
38
|
+
languages = self.languages
|
39
|
+
if not languages:
|
40
|
+
languages = ["en"]
|
41
|
+
|
37
42
|
results = []
|
38
43
|
for link in ytlinks:
|
39
44
|
video_id = self._extract_video_id(link)
|
@@ -6,7 +6,7 @@
|
|
6
6
|
# GitHub: https://github.com/szczyglis-dev/py-gpt #
|
7
7
|
# MIT License #
|
8
8
|
# Created By : Marcin Szczygliński #
|
9
|
-
# Updated Date: 2024.
|
9
|
+
# Updated Date: 2024.12.16 01:00:00 #
|
10
10
|
# ================================================== #
|
11
11
|
|
12
12
|
import json
|
@@ -30,14 +30,16 @@ class Loader(BaseLoader):
|
|
30
30
|
"args": {
|
31
31
|
"query": {
|
32
32
|
"type": "str",
|
33
|
+
"label": "SQL query",
|
34
|
+
"description": "SQL query to read data from database, e.g. SELECT * FROM table",
|
33
35
|
},
|
34
36
|
},
|
35
37
|
}
|
36
38
|
}
|
37
39
|
]
|
38
40
|
self.init_args = {
|
39
|
-
"sql_database": None,
|
40
|
-
"engine": None,
|
41
|
+
# "sql_database": None,
|
42
|
+
# "engine": None,
|
41
43
|
"uri": None,
|
42
44
|
"scheme": None,
|
43
45
|
"host": None,
|
@@ -47,8 +49,8 @@ class Loader(BaseLoader):
|
|
47
49
|
"dbname": None,
|
48
50
|
}
|
49
51
|
self.init_args_types = {
|
50
|
-
"sql_database": "str",
|
51
|
-
"engine": "str",
|
52
|
+
# "sql_database": "str",
|
53
|
+
# "engine": "str",
|
52
54
|
"uri": "str",
|
53
55
|
"scheme": "str",
|
54
56
|
"host": "str",
|
@@ -57,6 +59,12 @@ class Loader(BaseLoader):
|
|
57
59
|
"password": "str",
|
58
60
|
"dbname": "str",
|
59
61
|
}
|
62
|
+
self.init_args_desc = {
|
63
|
+
# "sql_database": "str",
|
64
|
+
# "engine": "str",
|
65
|
+
"uri": "You can provide a single URI in the form of: {scheme}://{user}:{password}@{host}:{port}/{dbname}, "
|
66
|
+
"or you can provide each field manually:",
|
67
|
+
}
|
60
68
|
|
61
69
|
def get(self) -> BaseReader:
|
62
70
|
"""
|
@@ -6,7 +6,7 @@
|
|
6
6
|
# GitHub: https://github.com/szczyglis-dev/py-gpt #
|
7
7
|
# MIT License #
|
8
8
|
# Created By : Marcin Szczygliński #
|
9
|
-
# Updated Date: 2024.
|
9
|
+
# Updated Date: 2024.12.16 01:00:00 #
|
10
10
|
# ================================================== #
|
11
11
|
|
12
12
|
import json
|
@@ -103,8 +103,12 @@ class Loader(BaseLoader):
|
|
103
103
|
if "label_filters_include" in kwargs and kwargs.get("label_filters_include"):
|
104
104
|
if isinstance(kwargs.get("label_filters_include"), list):
|
105
105
|
args["label_filters_include"] = kwargs.get("label_filters_include")
|
106
|
+
elif isinstance(kwargs.get("label_filters_include"), str):
|
107
|
+
args["label_filters_include"] = self.explode(kwargs.get("label_filters_include"))
|
106
108
|
if "label_filters_exclude" in kwargs and kwargs.get("label_filters_exclude"):
|
107
109
|
if isinstance(kwargs.get("label_filters_exclude"), list):
|
108
110
|
args["label_filters_exclude"] = kwargs.get("label_filters_exclude")
|
111
|
+
elif isinstance(kwargs.get("label_filters_exclude"), str):
|
112
|
+
args["label_filters_exclude"] = self.explode(kwargs.get("label_filters_exclude"))
|
109
113
|
|
110
114
|
return args
|
@@ -6,7 +6,7 @@
|
|
6
6
|
# GitHub: https://github.com/szczyglis-dev/py-gpt #
|
7
7
|
# MIT License #
|
8
8
|
# Created By : Marcin Szczygliński #
|
9
|
-
# Updated Date: 2024.
|
9
|
+
# Updated Date: 2024.12.16 01:00:00 #
|
10
10
|
# ================================================== #
|
11
11
|
|
12
12
|
import json
|
@@ -30,9 +30,13 @@ class Loader(BaseLoader):
|
|
30
30
|
"args": {
|
31
31
|
"number_of_results": {
|
32
32
|
"type": "int",
|
33
|
+
"label": "Number of results",
|
34
|
+
"description": "Number of results to fetch, default: 100",
|
33
35
|
},
|
34
36
|
"start_date": {
|
35
37
|
"type": "str",
|
38
|
+
"label": "Start date",
|
39
|
+
"description": "Start date for events, ISO format: YYYY-MM-DD, default: today",
|
36
40
|
},
|
37
41
|
},
|
38
42
|
}
|
@@ -79,8 +83,12 @@ class Loader(BaseLoader):
|
|
79
83
|
if "number_of_results" in kwargs and kwargs.get("number_of_results"):
|
80
84
|
if isinstance(kwargs.get("number_of_results"), int):
|
81
85
|
args["number_of_results"] = kwargs.get("number_of_results") # number of results
|
86
|
+
if args["number_of_results"] == 0:
|
87
|
+
args["number_of_results"] = None
|
82
88
|
|
83
89
|
if "start_date" in kwargs and kwargs.get("start_date"):
|
84
90
|
if isinstance(kwargs.get("start_date"), str):
|
85
91
|
args["start_date"] = kwargs.get("start_date") # start date
|
92
|
+
if args["start_date"] == "today" or args["start_date"] == "":
|
93
|
+
args["start_date"] = None
|
86
94
|
return args
|
@@ -6,7 +6,7 @@
|
|
6
6
|
# GitHub: https://github.com/szczyglis-dev/py-gpt #
|
7
7
|
# MIT License #
|
8
8
|
# Created By : Marcin Szczygliński #
|
9
|
-
# Updated Date: 2024.
|
9
|
+
# Updated Date: 2024.12.16 01:00:00 #
|
10
10
|
# ================================================== #
|
11
11
|
|
12
12
|
import json
|
@@ -30,6 +30,9 @@ class Loader(BaseLoader):
|
|
30
30
|
"args": {
|
31
31
|
"document_ids": {
|
32
32
|
"type": "list",
|
33
|
+
"label": "Document IDs",
|
34
|
+
"description": "List of document IDs to index, separated by comma (,)",
|
35
|
+
"required": True,
|
33
36
|
},
|
34
37
|
},
|
35
38
|
}
|
@@ -76,4 +79,6 @@ class Loader(BaseLoader):
|
|
76
79
|
if "document_ids" in kwargs and kwargs.get("document_ids"):
|
77
80
|
if isinstance(kwargs.get("document_ids"), list):
|
78
81
|
args["document_ids"] = kwargs.get("document_ids") # list of document ids
|
82
|
+
elif isinstance(kwargs.get("document_ids"), str):
|
83
|
+
args["document_ids"] = self.explode(kwargs.get("document_ids"))
|
79
84
|
return args
|
@@ -6,7 +6,7 @@
|
|
6
6
|
# GitHub: https://github.com/szczyglis-dev/py-gpt #
|
7
7
|
# MIT License #
|
8
8
|
# Created By : Marcin Szczygliński #
|
9
|
-
# Updated Date: 2024.
|
9
|
+
# Updated Date: 2024.12.16 01:00:00 #
|
10
10
|
# ================================================== #
|
11
11
|
|
12
12
|
import json
|
@@ -30,12 +30,17 @@ class Loader(BaseLoader):
|
|
30
30
|
"args": {
|
31
31
|
"folder_id": {
|
32
32
|
"type": "str",
|
33
|
+
"label": "Folder ID",
|
33
34
|
},
|
34
35
|
"file_ids": {
|
35
36
|
"type": "list",
|
37
|
+
"label": "File IDs",
|
38
|
+
"description": "List of file ids, separated by comma (,)",
|
36
39
|
},
|
37
40
|
"mime_types": {
|
38
41
|
"type": "list",
|
42
|
+
"label": "Mime Types",
|
43
|
+
"description": "List of mime types, separated by comma (,)",
|
39
44
|
},
|
40
45
|
},
|
41
46
|
}
|
@@ -98,8 +103,12 @@ class Loader(BaseLoader):
|
|
98
103
|
if "file_ids" in kwargs and kwargs.get("file_ids"):
|
99
104
|
if isinstance(kwargs.get("file_ids"), list):
|
100
105
|
args["file_ids"] = kwargs.get("file_ids") # list of file ids
|
106
|
+
elif isinstance(kwargs.get("file_ids"), str):
|
107
|
+
args["file_ids"] = self.explode(kwargs.get("file_ids"))
|
101
108
|
|
102
109
|
if "mime_types" in kwargs and kwargs.get("mime_types"):
|
103
110
|
if isinstance(kwargs.get("mime_types"), list):
|
104
111
|
args["mime_types"] = kwargs.get("mime_types") # list of mime types
|
112
|
+
elif isinstance(kwargs.get("mime_types"), str):
|
113
|
+
args["mime_types"] = self.explode(kwargs.get("mime_types"))
|
105
114
|
return args
|
@@ -6,7 +6,7 @@
|
|
6
6
|
# GitHub: https://github.com/szczyglis-dev/py-gpt #
|
7
7
|
# MIT License #
|
8
8
|
# Created By : Marcin Szczygliński #
|
9
|
-
# Updated Date: 2024.
|
9
|
+
# Updated Date: 2024.12.16 01:00:00 #
|
10
10
|
# ================================================== #
|
11
11
|
|
12
12
|
import json
|
@@ -30,6 +30,7 @@ class Loader(BaseLoader):
|
|
30
30
|
"args": {
|
31
31
|
"query": {
|
32
32
|
"type": "str",
|
33
|
+
"label": "Query",
|
33
34
|
},
|
34
35
|
},
|
35
36
|
}
|
@@ -6,7 +6,7 @@
|
|
6
6
|
# GitHub: https://github.com/szczyglis-dev/py-gpt #
|
7
7
|
# MIT License #
|
8
8
|
# Created By : Marcin Szczygliński #
|
9
|
-
# Updated Date: 2024.
|
9
|
+
# Updated Date: 2024.12.16 01:00:00 #
|
10
10
|
# ================================================== #
|
11
11
|
|
12
12
|
import json
|
@@ -30,6 +30,8 @@ class Loader(BaseLoader):
|
|
30
30
|
"args": {
|
31
31
|
"document_ids": {
|
32
32
|
"type": "list",
|
33
|
+
"label": "Document IDs",
|
34
|
+
"description": "List of document ids, separated by comma (,)",
|
33
35
|
},
|
34
36
|
},
|
35
37
|
}
|
@@ -74,4 +76,6 @@ class Loader(BaseLoader):
|
|
74
76
|
if "document_ids" in kwargs and kwargs.get("document_ids"):
|
75
77
|
if isinstance(kwargs.get("document_ids"), list):
|
76
78
|
args["document_ids"] = kwargs.get("document_ids") # list of document ids
|
79
|
+
elif isinstance(kwargs.get("document_ids"), str):
|
80
|
+
args["document_ids"] = self.explode(kwargs.get("document_ids"))
|
77
81
|
return args
|
@@ -6,7 +6,7 @@
|
|
6
6
|
# GitHub: https://github.com/szczyglis-dev/py-gpt #
|
7
7
|
# MIT License #
|
8
8
|
# Created By : Marcin Szczygliński #
|
9
|
-
# Updated Date: 2024.
|
9
|
+
# Updated Date: 2024.12.16 01:00:00 #
|
10
10
|
# ================================================== #
|
11
11
|
|
12
12
|
import json
|
@@ -30,6 +30,8 @@ class Loader(BaseLoader):
|
|
30
30
|
"args": {
|
31
31
|
"spreadsheet_ids": {
|
32
32
|
"type": "list",
|
33
|
+
"label": "Spreadsheet IDs",
|
34
|
+
"description": "List of spreadsheet ids, separated by comma (,)",
|
33
35
|
},
|
34
36
|
},
|
35
37
|
}
|
@@ -78,4 +80,6 @@ class Loader(BaseLoader):
|
|
78
80
|
if "spreadsheet_ids" in kwargs and kwargs.get("spreadsheet_ids"):
|
79
81
|
if isinstance(kwargs.get("spreadsheet_ids"), list):
|
80
82
|
args["spreadsheet_ids"] = kwargs.get("spreadsheet_ids") # spreadsheet ids
|
83
|
+
elif isinstance(kwargs.get("spreadsheet_ids"), str):
|
84
|
+
args["spreadsheet_ids"] = self.explode(kwargs.get("spreadsheet_ids"))
|
81
85
|
return args
|
@@ -6,7 +6,7 @@
|
|
6
6
|
# GitHub: https://github.com/szczyglis-dev/py-gpt #
|
7
7
|
# MIT License #
|
8
8
|
# Created By : Marcin Szczygliński #
|
9
|
-
# Updated Date: 2024.
|
9
|
+
# Updated Date: 2024.12.16 01:00:00 #
|
10
10
|
# ================================================== #
|
11
11
|
|
12
12
|
import json
|
@@ -30,18 +30,26 @@ class Loader(BaseLoader):
|
|
30
30
|
"args": {
|
31
31
|
"folder_id": {
|
32
32
|
"type": "str",
|
33
|
+
"label": "Folder ID",
|
33
34
|
},
|
34
35
|
"file_ids": {
|
35
36
|
"type": "list",
|
37
|
+
"label": "File IDs",
|
38
|
+
"description": "List of file ids, separated by comma (,)",
|
36
39
|
},
|
37
40
|
"mime_types": {
|
38
41
|
"type": "list",
|
42
|
+
"label": "Mime Types",
|
43
|
+
"description": "List of mime types, separated by comma (,)",
|
39
44
|
},
|
40
45
|
"folder_path": {
|
41
46
|
"type": "str",
|
47
|
+
"label": "Folder Path",
|
42
48
|
},
|
43
49
|
"file_paths": {
|
44
50
|
"type": "list",
|
51
|
+
"label": "File Paths",
|
52
|
+
"description": "List of file paths, separated by comma (,)",
|
45
53
|
},
|
46
54
|
},
|
47
55
|
}
|
@@ -98,10 +106,14 @@ class Loader(BaseLoader):
|
|
98
106
|
if "file_ids" in kwargs and kwargs.get("file_ids"):
|
99
107
|
if isinstance(kwargs.get("file_ids"), list):
|
100
108
|
args["file_ids"] = kwargs.get("file_ids") # list of file ids
|
109
|
+
elif isinstance(kwargs.get("file_ids"), str):
|
110
|
+
args["file_ids"] = self.explode(kwargs.get("file_ids"))
|
101
111
|
|
102
112
|
if "mime_types" in kwargs and kwargs.get("mime_types"):
|
103
113
|
if isinstance(kwargs.get("mime_types"), list):
|
104
114
|
args["mime_types"] = kwargs.get("mime_types") # list of mime types
|
115
|
+
elif isinstance(kwargs.get("mime_types"), str):
|
116
|
+
args["mime_types"] = self.explode(kwargs.get("mime_types"))
|
105
117
|
|
106
118
|
if "folder_path" in kwargs and kwargs.get("folder_path"):
|
107
119
|
if isinstance(kwargs.get("folder_path"), str):
|
@@ -110,4 +122,6 @@ class Loader(BaseLoader):
|
|
110
122
|
if "file_paths" in kwargs and kwargs.get("file_paths"):
|
111
123
|
if isinstance(kwargs.get("file_paths"), list):
|
112
124
|
args["file_paths"] = kwargs.get("file_paths") # list of file paths
|
125
|
+
elif isinstance(kwargs.get("file_paths"), str):
|
126
|
+
args["file_paths"] = self.explode(kwargs.get("file_paths"))
|
113
127
|
return args
|
@@ -6,7 +6,7 @@
|
|
6
6
|
# GitHub: https://github.com/szczyglis-dev/py-gpt #
|
7
7
|
# MIT License #
|
8
8
|
# Created By : Marcin Szczygliński #
|
9
|
-
# Updated Date: 2024.
|
9
|
+
# Updated Date: 2024.12.16 01:00:00 #
|
10
10
|
# ================================================== #
|
11
11
|
|
12
12
|
from llama_index.core.readers.base import BaseReader
|
@@ -19,7 +19,7 @@ class Loader(BaseLoader):
|
|
19
19
|
def __init__(self, *args, **kwargs):
|
20
20
|
super().__init__(*args, **kwargs)
|
21
21
|
self.id = "webpage"
|
22
|
-
self.name = "
|
22
|
+
self.name = "Webpage"
|
23
23
|
self.type = ["web"]
|
24
24
|
self.instructions = [
|
25
25
|
{
|
@@ -28,6 +28,8 @@ class Loader(BaseLoader):
|
|
28
28
|
"args": {
|
29
29
|
"url": {
|
30
30
|
"type": "str",
|
31
|
+
"label": "URL",
|
32
|
+
"description": "URL of the webpage to index, e.g. https://www.example.com",
|
31
33
|
},
|
32
34
|
},
|
33
35
|
}
|
@@ -6,7 +6,7 @@
|
|
6
6
|
# GitHub: https://github.com/szczyglis-dev/py-gpt #
|
7
7
|
# MIT License #
|
8
8
|
# Created By : Marcin Szczygliński #
|
9
|
-
# Updated Date: 2024.
|
9
|
+
# Updated Date: 2024.12.16 01:00:00 #
|
10
10
|
# ================================================== #
|
11
11
|
|
12
12
|
from llama_index.core.readers.base import BaseReader
|
@@ -28,6 +28,7 @@ class Loader(BaseLoader):
|
|
28
28
|
"args": {
|
29
29
|
"url": {
|
30
30
|
"type": "str",
|
31
|
+
"label": "URL",
|
31
32
|
},
|
32
33
|
},
|
33
34
|
}
|
@@ -6,7 +6,7 @@
|
|
6
6
|
# GitHub: https://github.com/szczyglis-dev/py-gpt #
|
7
7
|
# MIT License #
|
8
8
|
# Created By : Marcin Szczygliński #
|
9
|
-
# Updated Date: 2024.
|
9
|
+
# Updated Date: 2024.12.16 01:00:00 #
|
10
10
|
# ================================================== #
|
11
11
|
|
12
12
|
from llama_index.core.readers.base import BaseReader
|
@@ -28,6 +28,7 @@ class Loader(BaseLoader):
|
|
28
28
|
"args": {
|
29
29
|
"url": {
|
30
30
|
"type": "str",
|
31
|
+
"label": "URL",
|
31
32
|
},
|
32
33
|
},
|
33
34
|
}
|
@@ -6,7 +6,7 @@
|
|
6
6
|
# GitHub: https://github.com/szczyglis-dev/py-gpt #
|
7
7
|
# MIT License #
|
8
8
|
# Created By : Marcin Szczygliński #
|
9
|
-
# Updated Date: 2024.
|
9
|
+
# Updated Date: 2024.12.16 01:00:00 #
|
10
10
|
# ================================================== #
|
11
11
|
|
12
12
|
import json
|
@@ -30,9 +30,11 @@ class Loader(BaseLoader):
|
|
30
30
|
"args": {
|
31
31
|
"users": {
|
32
32
|
"type": "list",
|
33
|
+
"label": "Twitter/X usernames",
|
33
34
|
},
|
34
35
|
"max_tweets": {
|
35
36
|
"type": "int",
|
37
|
+
"label": "Max tweets",
|
36
38
|
},
|
37
39
|
},
|
38
40
|
}
|
@@ -80,7 +82,7 @@ class Loader(BaseLoader):
|
|
80
82
|
if isinstance(kwargs.get("users"), list):
|
81
83
|
args["twitterhandles"] = kwargs.get("users") # usernames
|
82
84
|
elif isinstance(kwargs.get("users"), str):
|
83
|
-
args["twitterhandles"] =
|
85
|
+
args["twitterhandles"] = self.explode(kwargs.get("users"))
|
84
86
|
|
85
87
|
if "max_tweets" in kwargs and kwargs.get("max_tweets"):
|
86
88
|
if isinstance(kwargs.get("max_tweets"), int):
|
@@ -6,7 +6,7 @@
|
|
6
6
|
# GitHub: https://github.com/szczyglis-dev/py-gpt #
|
7
7
|
# MIT License #
|
8
8
|
# Created By : Marcin Szczygliński #
|
9
|
-
# Updated Date: 2024.
|
9
|
+
# Updated Date: 2024.12.16 01:00:00 #
|
10
10
|
# ================================================== #
|
11
11
|
|
12
12
|
from llama_index.core.readers.base import BaseReader
|
@@ -28,11 +28,25 @@ class Loader(BaseLoader):
|
|
28
28
|
"args": {
|
29
29
|
"url": {
|
30
30
|
"type": "str",
|
31
|
+
"label": "Youtube URL",
|
32
|
+
"description": "URL of the YouTube video, e.g. https://www.youtube.com/watch?v=CRRlbK5w8AE",
|
31
33
|
},
|
32
34
|
},
|
33
35
|
}
|
34
36
|
}
|
35
37
|
]
|
38
|
+
self.init_args = {
|
39
|
+
"languages": ["en"],
|
40
|
+
}
|
41
|
+
self.init_args_types = {
|
42
|
+
"languages": "list",
|
43
|
+
}
|
44
|
+
self.init_args_labels = {
|
45
|
+
"languages": "Languages",
|
46
|
+
}
|
47
|
+
self.init_args_desc = {
|
48
|
+
"languages": "List of languages to extract from the video, separated by comma (,), e.g. 'en,de,fr'. Default is 'en'",
|
49
|
+
}
|
36
50
|
|
37
51
|
def get(self) -> BaseReader:
|
38
52
|
"""
|
@@ -40,7 +54,8 @@ class Loader(BaseLoader):
|
|
40
54
|
|
41
55
|
:return: Data reader instance
|
42
56
|
"""
|
43
|
-
|
57
|
+
args = self.get_args()
|
58
|
+
return YoutubeTranscriptReader(**args)
|
44
59
|
|
45
60
|
def prepare_args(self, **kwargs) -> dict:
|
46
61
|
"""
|