pygpt-net 2.4.42__py3-none-any.whl → 2.4.44__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (67) hide show
  1. CHANGELOG.md +11 -0
  2. README.md +17 -2
  3. pygpt_net/CHANGELOG.txt +11 -0
  4. pygpt_net/__init__.py +3 -3
  5. pygpt_net/controller/attachment.py +31 -3
  6. pygpt_net/controller/chat/attachment.py +37 -36
  7. pygpt_net/controller/config/placeholder.py +6 -4
  8. pygpt_net/controller/idx/common.py +7 -3
  9. pygpt_net/core/attachments/__init__.py +7 -2
  10. pygpt_net/core/attachments/context.py +52 -34
  11. pygpt_net/core/db/__init__.py +2 -1
  12. pygpt_net/core/debug/attachments.py +1 -0
  13. pygpt_net/core/idx/__init__.py +8 -3
  14. pygpt_net/core/idx/indexing.py +24 -7
  15. pygpt_net/core/idx/ui/__init__.py +22 -0
  16. pygpt_net/core/idx/ui/loaders.py +217 -0
  17. pygpt_net/data/config/config.json +4 -4
  18. pygpt_net/data/config/models.json +3 -3
  19. pygpt_net/data/config/modes.json +3 -3
  20. pygpt_net/data/config/settings.json +5 -5
  21. pygpt_net/data/locale/locale.de.ini +3 -3
  22. pygpt_net/data/locale/locale.en.ini +11 -9
  23. pygpt_net/data/locale/locale.es.ini +3 -3
  24. pygpt_net/data/locale/locale.fr.ini +3 -3
  25. pygpt_net/data/locale/locale.it.ini +3 -3
  26. pygpt_net/data/locale/locale.pl.ini +3 -3
  27. pygpt_net/data/locale/locale.uk.ini +3 -3
  28. pygpt_net/data/locale/locale.zh.ini +3 -3
  29. pygpt_net/data/locale/plugin.mailer.en.ini +5 -5
  30. pygpt_net/item/attachment.py +5 -1
  31. pygpt_net/item/ctx.py +99 -2
  32. pygpt_net/migrations/Version20241215110000.py +25 -0
  33. pygpt_net/migrations/__init__.py +3 -1
  34. pygpt_net/plugin/cmd_files/__init__.py +3 -2
  35. pygpt_net/provider/core/attachment/json_file.py +4 -1
  36. pygpt_net/provider/core/config/patch.py +6 -0
  37. pygpt_net/provider/core/ctx/db_sqlite/storage.py +50 -7
  38. pygpt_net/provider/core/ctx/db_sqlite/utils.py +29 -5
  39. pygpt_net/provider/loaders/base.py +14 -0
  40. pygpt_net/provider/loaders/hub/yt/base.py +5 -0
  41. pygpt_net/provider/loaders/web_database.py +13 -5
  42. pygpt_net/provider/loaders/web_github_issues.py +5 -1
  43. pygpt_net/provider/loaders/web_google_calendar.py +9 -1
  44. pygpt_net/provider/loaders/web_google_docs.py +6 -1
  45. pygpt_net/provider/loaders/web_google_drive.py +10 -1
  46. pygpt_net/provider/loaders/web_google_gmail.py +2 -1
  47. pygpt_net/provider/loaders/web_google_keep.py +5 -1
  48. pygpt_net/provider/loaders/web_google_sheets.py +5 -1
  49. pygpt_net/provider/loaders/web_microsoft_onedrive.py +15 -1
  50. pygpt_net/provider/loaders/web_page.py +4 -2
  51. pygpt_net/provider/loaders/web_rss.py +2 -1
  52. pygpt_net/provider/loaders/web_sitemap.py +2 -1
  53. pygpt_net/provider/loaders/web_twitter.py +4 -2
  54. pygpt_net/provider/loaders/web_yt.py +17 -2
  55. pygpt_net/provider/vector_stores/ctx_attachment.py +1 -1
  56. pygpt_net/tools/indexer/__init__.py +8 -40
  57. pygpt_net/tools/indexer/ui/web.py +20 -78
  58. pygpt_net/ui/layout/ctx/ctx_list.py +86 -18
  59. pygpt_net/ui/widget/dialog/url.py +151 -14
  60. pygpt_net/ui/widget/element/group.py +15 -2
  61. pygpt_net/ui/widget/lists/context.py +23 -9
  62. pygpt_net/utils.py +1 -1
  63. {pygpt_net-2.4.42.dist-info → pygpt_net-2.4.44.dist-info}/METADATA +18 -3
  64. {pygpt_net-2.4.42.dist-info → pygpt_net-2.4.44.dist-info}/RECORD +67 -64
  65. {pygpt_net-2.4.42.dist-info → pygpt_net-2.4.44.dist-info}/LICENSE +0 -0
  66. {pygpt_net-2.4.42.dist-info → pygpt_net-2.4.44.dist-info}/WHEEL +0 -0
  67. {pygpt_net-2.4.42.dist-info → pygpt_net-2.4.44.dist-info}/entry_points.txt +0 -0
@@ -68,6 +68,9 @@ class Storage:
68
68
  # only base by default
69
69
  where_clauses.append("(m.root_id IS NULL OR m.root_id = 0)")
70
70
 
71
+ # join group
72
+ join_clauses.append("LEFT JOIN ctx_group g ON m.group_id = g.id")
73
+
71
74
  # search_string
72
75
  if search_string:
73
76
  date_ranges = search_by_date_string(search_string)
@@ -106,15 +109,16 @@ class Storage:
106
109
  continue
107
110
  mode = filter.get('mode', '=')
108
111
  value = filter.get('value', '')
112
+ key_name = 'm.' + key
109
113
  if isinstance(value, int):
110
- where_clauses.append(f"{key} {mode} :{key}")
114
+ where_clauses.append(f"{key_name} {mode} :{key}")
111
115
  bind_params[key] = value
112
116
  elif isinstance(value, str):
113
- where_clauses.append(f"{key} {mode} :{key}")
117
+ where_clauses.append(f"{key_name} {mode} :{key}")
114
118
  bind_params[key] = f"%{value}%"
115
119
  elif isinstance(value, list):
116
120
  values = "(" + ",".join([str(x) for x in value]) + ")"
117
- where_clauses.append(f"{key} {mode} {values}")
121
+ where_clauses.append(f"{key_name} {mode} {values}")
118
122
 
119
123
  where_statement = " AND ".join(where_clauses) if where_clauses else "1"
120
124
  join_statement = " ".join(join_clauses) if join_clauses else ""
@@ -154,8 +158,18 @@ class Storage:
154
158
  append_date_ranges=True,
155
159
  )
156
160
  stmt_text = f"""
157
- SELECT m.* FROM ctx_meta m {join_statement} WHERE {where_statement}
158
- ORDER BY m.updated_ts DESC {limit_suffix}
161
+ SELECT
162
+ m.*,
163
+ g.name as group_name,
164
+ g.uuid as group_uuid,
165
+ g.additional_ctx_json as group_additional_ctx_json
166
+ FROM
167
+ ctx_meta m
168
+ {join_statement}
169
+ WHERE
170
+ {where_statement}
171
+ ORDER BY
172
+ m.updated_ts DESC {limit_suffix}
159
173
  """
160
174
  stmt = text(stmt_text).bindparams(**bind_params)
161
175
 
@@ -177,7 +191,17 @@ class Storage:
177
191
  :return: dict of CtxMeta
178
192
  """
179
193
  stmt_text = f"""
180
- SELECT * FROM ctx_meta WHERE indexed_ts > 0
194
+ SELECT
195
+ m.*,
196
+ g.name as group_name,
197
+ g.uuid as group_uuid,
198
+ g.additional_ctx_json as group_additional_ctx_json
199
+ FROM
200
+ ctx_meta m
201
+ LEFT JOIN
202
+ ctx_group g ON m.group_id = g.id
203
+ WHERE
204
+ indexed_ts > 0
181
205
  """
182
206
  stmt = text(stmt_text)
183
207
  items = {}
@@ -431,7 +455,26 @@ class Storage:
431
455
  )
432
456
  with db.begin() as conn:
433
457
  conn.execute(stmt)
434
- return True
458
+
459
+ # update group
460
+ if meta.group:
461
+ stmt = text("""
462
+ UPDATE ctx_group
463
+ SET
464
+ name = :name,
465
+ additional_ctx_json = :additional_ctx_json,
466
+ updated_ts = :updated_ts
467
+ WHERE id = :id
468
+ """).bindparams(
469
+ id=meta.group.id,
470
+ name=meta.group.name,
471
+ additional_ctx_json=pack_item_value(meta.group.additional_ctx),
472
+ updated_ts=int(time.time()),
473
+ )
474
+ with db.begin() as conn:
475
+ conn.execute(stmt)
476
+
477
+ return True
435
478
 
436
479
  def update_meta_all(
437
480
  self,
@@ -125,7 +125,10 @@ def unpack_item_value(value: Any) -> Any:
125
125
  return value
126
126
 
127
127
 
128
- def unpack_item(item: CtxItem, row: Dict[str, Any]) -> CtxItem:
128
+ def unpack_item(
129
+ item: CtxItem,
130
+ row: Dict[str, Any]
131
+ ) -> CtxItem:
129
132
  """
130
133
  Unpack context item from DB row
131
134
 
@@ -185,9 +188,12 @@ def unpack_item(item: CtxItem, row: Dict[str, Any]) -> CtxItem:
185
188
  return item
186
189
 
187
190
 
188
- def unpack_meta(meta: CtxMeta, row: Dict[str, Any]) -> CtxMeta:
191
+ def unpack_meta(
192
+ meta: CtxMeta,
193
+ row: Dict[str, Any]
194
+ ) -> CtxMeta:
189
195
  """
190
- Unpack context meta data from DB row
196
+ Unpack context meta-data from DB row
191
197
 
192
198
  :param meta: Context meta (CtxMeta)
193
199
  :param row: DB row
@@ -221,20 +227,38 @@ def unpack_meta(meta: CtxMeta, row: Dict[str, Any]) -> CtxMeta:
221
227
 
222
228
  if meta.additional_ctx is None:
223
229
  meta.additional_ctx = []
230
+
231
+ # add group if exists
232
+ if meta.group_id:
233
+ group = CtxGroup()
234
+ group.id = meta.group_id
235
+ group.uuid = row['group_uuid']
236
+ group.name = row['group_name']
237
+ group.additional_ctx = unpack_item_value(row['group_additional_ctx_json'])
238
+ if group.additional_ctx is None:
239
+ group.additional_ctx = []
240
+ meta.group = group
241
+
224
242
  return meta
225
243
 
226
244
 
227
- def unpack_group(group: CtxGroup, row: Dict[str, Any]) -> CtxGroup:
245
+ def unpack_group(
246
+ group: CtxGroup,
247
+ row: Dict[str, Any]
248
+ ) -> CtxGroup:
228
249
  """
229
250
  Unpack context group data from DB row
230
251
 
231
252
  :param group: Context group (CtxGroup)
232
253
  :param row: DB row
233
- :return: context meta
254
+ :return: context group
234
255
  """
235
256
  group.id = unpack_var(row['id'], 'int')
236
257
  group.uuid = row['uuid']
237
258
  group.created = unpack_var(row['created_ts'], 'int')
238
259
  group.updated = unpack_var(row['updated_ts'], 'int')
239
260
  group.name = row['name']
261
+ group.additional_ctx = unpack_item_value(row['additional_ctx_json'])
262
+ if group.additional_ctx is None:
263
+ group.additional_ctx = []
240
264
  return group
@@ -22,7 +22,9 @@ class BaseLoader:
22
22
  self.instructions = [] # list of instructions for 'web_index' command for how to handle this type
23
23
  self.args = {} # custom keyword arguments
24
24
  self.init_args = {} # initial keyword arguments
25
+ self.init_args_labels = {}
25
26
  self.init_args_types = {}
27
+ self.init_args_desc = {}
26
28
  self.allow_compiled = True # allow in compiled and Snap versions
27
29
  # This is required due to some readers may require Python environment to install additional packages
28
30
 
@@ -42,6 +44,18 @@ class BaseLoader:
42
44
  """
43
45
  self.args = args
44
46
 
47
+ def explode(self, value: str) -> list:
48
+ """
49
+ Explode list from string
50
+
51
+ :param value: value string
52
+ :return: list
53
+ """
54
+ if value:
55
+ items = value.split(",")
56
+ return [item.strip() for item in items]
57
+ return []
58
+
45
59
  def get_args(self):
46
60
  """
47
61
  Prepare keyword arguments for reader init method
@@ -15,6 +15,7 @@ class YoutubeTranscriptReader(BasePydanticReader):
15
15
  """Youtube Transcript reader."""
16
16
 
17
17
  is_remote: bool = True
18
+ languages: List[str] = ["en"]
18
19
 
19
20
  @classmethod
20
21
  def class_name(cls) -> str:
@@ -34,6 +35,10 @@ class YoutubeTranscriptReader(BasePydanticReader):
34
35
  for which transcripts are to be read.
35
36
 
36
37
  """
38
+ languages = self.languages
39
+ if not languages:
40
+ languages = ["en"]
41
+
37
42
  results = []
38
43
  for link in ytlinks:
39
44
  video_id = self._extract_video_id(link)
@@ -6,7 +6,7 @@
6
6
  # GitHub: https://github.com/szczyglis-dev/py-gpt #
7
7
  # MIT License #
8
8
  # Created By : Marcin Szczygliński #
9
- # Updated Date: 2024.04.17 01:00:00 #
9
+ # Updated Date: 2024.12.16 01:00:00 #
10
10
  # ================================================== #
11
11
 
12
12
  import json
@@ -30,14 +30,16 @@ class Loader(BaseLoader):
30
30
  "args": {
31
31
  "query": {
32
32
  "type": "str",
33
+ "label": "SQL query",
34
+ "description": "SQL query to read data from database, e.g. SELECT * FROM table",
33
35
  },
34
36
  },
35
37
  }
36
38
  }
37
39
  ]
38
40
  self.init_args = {
39
- "sql_database": None,
40
- "engine": None,
41
+ # "sql_database": None,
42
+ # "engine": None,
41
43
  "uri": None,
42
44
  "scheme": None,
43
45
  "host": None,
@@ -47,8 +49,8 @@ class Loader(BaseLoader):
47
49
  "dbname": None,
48
50
  }
49
51
  self.init_args_types = {
50
- "sql_database": "str",
51
- "engine": "str",
52
+ # "sql_database": "str",
53
+ # "engine": "str",
52
54
  "uri": "str",
53
55
  "scheme": "str",
54
56
  "host": "str",
@@ -57,6 +59,12 @@ class Loader(BaseLoader):
57
59
  "password": "str",
58
60
  "dbname": "str",
59
61
  }
62
+ self.init_args_desc = {
63
+ # "sql_database": "str",
64
+ # "engine": "str",
65
+ "uri": "You can provide a single URI in the form of: {scheme}://{user}:{password}@{host}:{port}/{dbname}, "
66
+ "or you can provide each field manually:",
67
+ }
60
68
 
61
69
  def get(self) -> BaseReader:
62
70
  """
@@ -6,7 +6,7 @@
6
6
  # GitHub: https://github.com/szczyglis-dev/py-gpt #
7
7
  # MIT License #
8
8
  # Created By : Marcin Szczygliński #
9
- # Updated Date: 2024.04.17 01:00:00 #
9
+ # Updated Date: 2024.12.16 01:00:00 #
10
10
  # ================================================== #
11
11
 
12
12
  import json
@@ -103,8 +103,12 @@ class Loader(BaseLoader):
103
103
  if "label_filters_include" in kwargs and kwargs.get("label_filters_include"):
104
104
  if isinstance(kwargs.get("label_filters_include"), list):
105
105
  args["label_filters_include"] = kwargs.get("label_filters_include")
106
+ elif isinstance(kwargs.get("label_filters_include"), str):
107
+ args["label_filters_include"] = self.explode(kwargs.get("label_filters_include"))
106
108
  if "label_filters_exclude" in kwargs and kwargs.get("label_filters_exclude"):
107
109
  if isinstance(kwargs.get("label_filters_exclude"), list):
108
110
  args["label_filters_exclude"] = kwargs.get("label_filters_exclude")
111
+ elif isinstance(kwargs.get("label_filters_exclude"), str):
112
+ args["label_filters_exclude"] = self.explode(kwargs.get("label_filters_exclude"))
109
113
 
110
114
  return args
@@ -6,7 +6,7 @@
6
6
  # GitHub: https://github.com/szczyglis-dev/py-gpt #
7
7
  # MIT License #
8
8
  # Created By : Marcin Szczygliński #
9
- # Updated Date: 2024.04.17 01:00:00 #
9
+ # Updated Date: 2024.12.16 01:00:00 #
10
10
  # ================================================== #
11
11
 
12
12
  import json
@@ -30,9 +30,13 @@ class Loader(BaseLoader):
30
30
  "args": {
31
31
  "number_of_results": {
32
32
  "type": "int",
33
+ "label": "Number of results",
34
+ "description": "Number of results to fetch, default: 100",
33
35
  },
34
36
  "start_date": {
35
37
  "type": "str",
38
+ "label": "Start date",
39
+ "description": "Start date for events, ISO format: YYYY-MM-DD, default: today",
36
40
  },
37
41
  },
38
42
  }
@@ -79,8 +83,12 @@ class Loader(BaseLoader):
79
83
  if "number_of_results" in kwargs and kwargs.get("number_of_results"):
80
84
  if isinstance(kwargs.get("number_of_results"), int):
81
85
  args["number_of_results"] = kwargs.get("number_of_results") # number of results
86
+ if args["number_of_results"] == 0:
87
+ args["number_of_results"] = None
82
88
 
83
89
  if "start_date" in kwargs and kwargs.get("start_date"):
84
90
  if isinstance(kwargs.get("start_date"), str):
85
91
  args["start_date"] = kwargs.get("start_date") # start date
92
+ if args["start_date"] == "today" or args["start_date"] == "":
93
+ args["start_date"] = None
86
94
  return args
@@ -6,7 +6,7 @@
6
6
  # GitHub: https://github.com/szczyglis-dev/py-gpt #
7
7
  # MIT License #
8
8
  # Created By : Marcin Szczygliński #
9
- # Updated Date: 2024.04.17 01:00:00 #
9
+ # Updated Date: 2024.12.16 01:00:00 #
10
10
  # ================================================== #
11
11
 
12
12
  import json
@@ -30,6 +30,9 @@ class Loader(BaseLoader):
30
30
  "args": {
31
31
  "document_ids": {
32
32
  "type": "list",
33
+ "label": "Document IDs",
34
+ "description": "List of document IDs to index, separated by comma (,)",
35
+ "required": True,
33
36
  },
34
37
  },
35
38
  }
@@ -76,4 +79,6 @@ class Loader(BaseLoader):
76
79
  if "document_ids" in kwargs and kwargs.get("document_ids"):
77
80
  if isinstance(kwargs.get("document_ids"), list):
78
81
  args["document_ids"] = kwargs.get("document_ids") # list of document ids
82
+ elif isinstance(kwargs.get("document_ids"), str):
83
+ args["document_ids"] = self.explode(kwargs.get("document_ids"))
79
84
  return args
@@ -6,7 +6,7 @@
6
6
  # GitHub: https://github.com/szczyglis-dev/py-gpt #
7
7
  # MIT License #
8
8
  # Created By : Marcin Szczygliński #
9
- # Updated Date: 2024.08.19 20:00:00 #
9
+ # Updated Date: 2024.12.16 01:00:00 #
10
10
  # ================================================== #
11
11
 
12
12
  import json
@@ -30,12 +30,17 @@ class Loader(BaseLoader):
30
30
  "args": {
31
31
  "folder_id": {
32
32
  "type": "str",
33
+ "label": "Folder ID",
33
34
  },
34
35
  "file_ids": {
35
36
  "type": "list",
37
+ "label": "File IDs",
38
+ "description": "List of file ids, separated by comma (,)",
36
39
  },
37
40
  "mime_types": {
38
41
  "type": "list",
42
+ "label": "Mime Types",
43
+ "description": "List of mime types, separated by comma (,)",
39
44
  },
40
45
  },
41
46
  }
@@ -98,8 +103,12 @@ class Loader(BaseLoader):
98
103
  if "file_ids" in kwargs and kwargs.get("file_ids"):
99
104
  if isinstance(kwargs.get("file_ids"), list):
100
105
  args["file_ids"] = kwargs.get("file_ids") # list of file ids
106
+ elif isinstance(kwargs.get("file_ids"), str):
107
+ args["file_ids"] = self.explode(kwargs.get("file_ids"))
101
108
 
102
109
  if "mime_types" in kwargs and kwargs.get("mime_types"):
103
110
  if isinstance(kwargs.get("mime_types"), list):
104
111
  args["mime_types"] = kwargs.get("mime_types") # list of mime types
112
+ elif isinstance(kwargs.get("mime_types"), str):
113
+ args["mime_types"] = self.explode(kwargs.get("mime_types"))
105
114
  return args
@@ -6,7 +6,7 @@
6
6
  # GitHub: https://github.com/szczyglis-dev/py-gpt #
7
7
  # MIT License #
8
8
  # Created By : Marcin Szczygliński #
9
- # Updated Date: 2024.04.17 01:00:00 #
9
+ # Updated Date: 2024.12.16 01:00:00 #
10
10
  # ================================================== #
11
11
 
12
12
  import json
@@ -30,6 +30,7 @@ class Loader(BaseLoader):
30
30
  "args": {
31
31
  "query": {
32
32
  "type": "str",
33
+ "label": "Query",
33
34
  },
34
35
  },
35
36
  }
@@ -6,7 +6,7 @@
6
6
  # GitHub: https://github.com/szczyglis-dev/py-gpt #
7
7
  # MIT License #
8
8
  # Created By : Marcin Szczygliński #
9
- # Updated Date: 2024.04.17 01:00:00 #
9
+ # Updated Date: 2024.12.16 01:00:00 #
10
10
  # ================================================== #
11
11
 
12
12
  import json
@@ -30,6 +30,8 @@ class Loader(BaseLoader):
30
30
  "args": {
31
31
  "document_ids": {
32
32
  "type": "list",
33
+ "label": "Document IDs",
34
+ "description": "List of document ids, separated by comma (,)",
33
35
  },
34
36
  },
35
37
  }
@@ -74,4 +76,6 @@ class Loader(BaseLoader):
74
76
  if "document_ids" in kwargs and kwargs.get("document_ids"):
75
77
  if isinstance(kwargs.get("document_ids"), list):
76
78
  args["document_ids"] = kwargs.get("document_ids") # list of document ids
79
+ elif isinstance(kwargs.get("document_ids"), str):
80
+ args["document_ids"] = self.explode(kwargs.get("document_ids"))
77
81
  return args
@@ -6,7 +6,7 @@
6
6
  # GitHub: https://github.com/szczyglis-dev/py-gpt #
7
7
  # MIT License #
8
8
  # Created By : Marcin Szczygliński #
9
- # Updated Date: 2024.04.17 01:00:00 #
9
+ # Updated Date: 2024.12.16 01:00:00 #
10
10
  # ================================================== #
11
11
 
12
12
  import json
@@ -30,6 +30,8 @@ class Loader(BaseLoader):
30
30
  "args": {
31
31
  "spreadsheet_ids": {
32
32
  "type": "list",
33
+ "label": "Spreadsheet IDs",
34
+ "description": "List of spreadsheet ids, separated by comma (,)",
33
35
  },
34
36
  },
35
37
  }
@@ -78,4 +80,6 @@ class Loader(BaseLoader):
78
80
  if "spreadsheet_ids" in kwargs and kwargs.get("spreadsheet_ids"):
79
81
  if isinstance(kwargs.get("spreadsheet_ids"), list):
80
82
  args["spreadsheet_ids"] = kwargs.get("spreadsheet_ids") # spreadsheet ids
83
+ elif isinstance(kwargs.get("spreadsheet_ids"), str):
84
+ args["spreadsheet_ids"] = self.explode(kwargs.get("spreadsheet_ids"))
81
85
  return args
@@ -6,7 +6,7 @@
6
6
  # GitHub: https://github.com/szczyglis-dev/py-gpt #
7
7
  # MIT License #
8
8
  # Created By : Marcin Szczygliński #
9
- # Updated Date: 2024.04.17 01:00:00 #
9
+ # Updated Date: 2024.12.16 01:00:00 #
10
10
  # ================================================== #
11
11
 
12
12
  import json
@@ -30,18 +30,26 @@ class Loader(BaseLoader):
30
30
  "args": {
31
31
  "folder_id": {
32
32
  "type": "str",
33
+ "label": "Folder ID",
33
34
  },
34
35
  "file_ids": {
35
36
  "type": "list",
37
+ "label": "File IDs",
38
+ "description": "List of file ids, separated by comma (,)",
36
39
  },
37
40
  "mime_types": {
38
41
  "type": "list",
42
+ "label": "Mime Types",
43
+ "description": "List of mime types, separated by comma (,)",
39
44
  },
40
45
  "folder_path": {
41
46
  "type": "str",
47
+ "label": "Folder Path",
42
48
  },
43
49
  "file_paths": {
44
50
  "type": "list",
51
+ "label": "File Paths",
52
+ "description": "List of file paths, separated by comma (,)",
45
53
  },
46
54
  },
47
55
  }
@@ -98,10 +106,14 @@ class Loader(BaseLoader):
98
106
  if "file_ids" in kwargs and kwargs.get("file_ids"):
99
107
  if isinstance(kwargs.get("file_ids"), list):
100
108
  args["file_ids"] = kwargs.get("file_ids") # list of file ids
109
+ elif isinstance(kwargs.get("file_ids"), str):
110
+ args["file_ids"] = self.explode(kwargs.get("file_ids"))
101
111
 
102
112
  if "mime_types" in kwargs and kwargs.get("mime_types"):
103
113
  if isinstance(kwargs.get("mime_types"), list):
104
114
  args["mime_types"] = kwargs.get("mime_types") # list of mime types
115
+ elif isinstance(kwargs.get("mime_types"), str):
116
+ args["mime_types"] = self.explode(kwargs.get("mime_types"))
105
117
 
106
118
  if "folder_path" in kwargs and kwargs.get("folder_path"):
107
119
  if isinstance(kwargs.get("folder_path"), str):
@@ -110,4 +122,6 @@ class Loader(BaseLoader):
110
122
  if "file_paths" in kwargs and kwargs.get("file_paths"):
111
123
  if isinstance(kwargs.get("file_paths"), list):
112
124
  args["file_paths"] = kwargs.get("file_paths") # list of file paths
125
+ elif isinstance(kwargs.get("file_paths"), str):
126
+ args["file_paths"] = self.explode(kwargs.get("file_paths"))
113
127
  return args
@@ -6,7 +6,7 @@
6
6
  # GitHub: https://github.com/szczyglis-dev/py-gpt #
7
7
  # MIT License #
8
8
  # Created By : Marcin Szczygliński #
9
- # Updated Date: 2024.03.12 06:00:00 #
9
+ # Updated Date: 2024.12.16 01:00:00 #
10
10
  # ================================================== #
11
11
 
12
12
  from llama_index.core.readers.base import BaseReader
@@ -19,7 +19,7 @@ class Loader(BaseLoader):
19
19
  def __init__(self, *args, **kwargs):
20
20
  super().__init__(*args, **kwargs)
21
21
  self.id = "webpage"
22
- self.name = "Webpages"
22
+ self.name = "Webpage"
23
23
  self.type = ["web"]
24
24
  self.instructions = [
25
25
  {
@@ -28,6 +28,8 @@ class Loader(BaseLoader):
28
28
  "args": {
29
29
  "url": {
30
30
  "type": "str",
31
+ "label": "URL",
32
+ "description": "URL of the webpage to index, e.g. https://www.example.com",
31
33
  },
32
34
  },
33
35
  }
@@ -6,7 +6,7 @@
6
6
  # GitHub: https://github.com/szczyglis-dev/py-gpt #
7
7
  # MIT License #
8
8
  # Created By : Marcin Szczygliński #
9
- # Updated Date: 2024.03.12 06:00:00 #
9
+ # Updated Date: 2024.12.16 01:00:00 #
10
10
  # ================================================== #
11
11
 
12
12
  from llama_index.core.readers.base import BaseReader
@@ -28,6 +28,7 @@ class Loader(BaseLoader):
28
28
  "args": {
29
29
  "url": {
30
30
  "type": "str",
31
+ "label": "URL",
31
32
  },
32
33
  },
33
34
  }
@@ -6,7 +6,7 @@
6
6
  # GitHub: https://github.com/szczyglis-dev/py-gpt #
7
7
  # MIT License #
8
8
  # Created By : Marcin Szczygliński #
9
- # Updated Date: 2024.04.17 01:00:00 #
9
+ # Updated Date: 2024.12.16 01:00:00 #
10
10
  # ================================================== #
11
11
 
12
12
  from llama_index.core.readers.base import BaseReader
@@ -28,6 +28,7 @@ class Loader(BaseLoader):
28
28
  "args": {
29
29
  "url": {
30
30
  "type": "str",
31
+ "label": "URL",
31
32
  },
32
33
  },
33
34
  }
@@ -6,7 +6,7 @@
6
6
  # GitHub: https://github.com/szczyglis-dev/py-gpt #
7
7
  # MIT License #
8
8
  # Created By : Marcin Szczygliński #
9
- # Updated Date: 2024.04.17 01:00:00 #
9
+ # Updated Date: 2024.12.16 01:00:00 #
10
10
  # ================================================== #
11
11
 
12
12
  import json
@@ -30,9 +30,11 @@ class Loader(BaseLoader):
30
30
  "args": {
31
31
  "users": {
32
32
  "type": "list",
33
+ "label": "Twitter/X usernames",
33
34
  },
34
35
  "max_tweets": {
35
36
  "type": "int",
37
+ "label": "Max tweets",
36
38
  },
37
39
  },
38
40
  }
@@ -80,7 +82,7 @@ class Loader(BaseLoader):
80
82
  if isinstance(kwargs.get("users"), list):
81
83
  args["twitterhandles"] = kwargs.get("users") # usernames
82
84
  elif isinstance(kwargs.get("users"), str):
83
- args["twitterhandles"] = [kwargs.get("users")]
85
+ args["twitterhandles"] = self.explode(kwargs.get("users"))
84
86
 
85
87
  if "max_tweets" in kwargs and kwargs.get("max_tweets"):
86
88
  if isinstance(kwargs.get("max_tweets"), int):
@@ -6,7 +6,7 @@
6
6
  # GitHub: https://github.com/szczyglis-dev/py-gpt #
7
7
  # MIT License #
8
8
  # Created By : Marcin Szczygliński #
9
- # Updated Date: 2024.11.26 04:00:00 #
9
+ # Updated Date: 2024.12.16 01:00:00 #
10
10
  # ================================================== #
11
11
 
12
12
  from llama_index.core.readers.base import BaseReader
@@ -28,11 +28,25 @@ class Loader(BaseLoader):
28
28
  "args": {
29
29
  "url": {
30
30
  "type": "str",
31
+ "label": "Youtube URL",
32
+ "description": "URL of the YouTube video, e.g. https://www.youtube.com/watch?v=CRRlbK5w8AE",
31
33
  },
32
34
  },
33
35
  }
34
36
  }
35
37
  ]
38
+ self.init_args = {
39
+ "languages": ["en"],
40
+ }
41
+ self.init_args_types = {
42
+ "languages": "list",
43
+ }
44
+ self.init_args_labels = {
45
+ "languages": "Languages",
46
+ }
47
+ self.init_args_desc = {
48
+ "languages": "List of languages to extract from the video, separated by comma (,), e.g. 'en,de,fr'. Default is 'en'",
49
+ }
36
50
 
37
51
  def get(self) -> BaseReader:
38
52
  """
@@ -40,7 +54,8 @@ class Loader(BaseLoader):
40
54
 
41
55
  :return: Data reader instance
42
56
  """
43
- return YoutubeTranscriptReader()
57
+ args = self.get_args()
58
+ return YoutubeTranscriptReader(**args)
44
59
 
45
60
  def prepare_args(self, **kwargs) -> dict:
46
61
  """
@@ -50,7 +50,7 @@ class CtxAttachmentProvider(BaseStore):
50
50
 
51
51
  :return: True if exists
52
52
  """
53
- path = self.get_path()
53
+ path = self.get_path("")
54
54
  if os.path.exists(path):
55
55
  store = os.path.join(path, "docstore.json")
56
56
  if os.path.exists(store):