mixpeek 0.6.29__py3-none-any.whl → 0.7.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
mixpeek/client.py CHANGED
@@ -1,11 +1,10 @@
1
1
  import requests
2
2
 
3
- from .endpoints.extract import Extract
4
3
  from .endpoints.embed import Embed
5
- from .endpoints.generate import Generate
6
- from .endpoints.connections import Connections
4
+ from .endpoints.collections import Collections
5
+ from .endpoints.index import Index
6
+ from .endpoints.search import Search
7
7
  from .endpoints.tools import Tools
8
- from .endpoints.pipelines import Pipelines
9
8
 
10
9
 
11
10
  class Mixpeek:
@@ -16,11 +15,11 @@ class Mixpeek:
16
15
  "Authorization": f"Bearer {self.api_key}",
17
16
  "Content-Type": "application/json"
18
17
  }
19
- self.extract = Extract(self.base_url, self.headers)
18
+ # these are remote
20
19
  self.embed = Embed(self.base_url, self.headers)
21
- self.generate = Generate(self.base_url, self.headers)
20
+ self.collections = Collections(self.base_url, self.headers)
21
+ self.index = Index(self.base_url, self.headers)
22
+ self.search = Search(self.base_url, self.headers)
22
23
 
23
- # we include api_key as well because the header is different for upload
24
- self.connections = Connections(self.base_url, self.headers, self.api_key)
25
- self.tools = Tools(self.base_url, self.headers)
26
- self.pipelines = Pipelines(self.base_url, self.headers)
24
+ # tools is all local
25
+ self.tools = Tools()
@@ -0,0 +1,77 @@
1
+ import requests
2
+
3
+ class Collections:
4
+ def __init__(self, base_url, headers):
5
+ self.base_url = base_url
6
+ self.headers = headers
7
+
8
+ def list_files(self, collection_id, randomize=False, page=1, page_size=10, filters=None, sort_by=None, sort_order="asc"):
9
+ try:
10
+ url = f"{self.base_url}collections/"
11
+ data = {
12
+ "collection_id": collection_id,
13
+ "randomize": randomize,
14
+ "page": page,
15
+ "page_size": page_size,
16
+ "filters": filters,
17
+ "sort_by": sort_by,
18
+ "sort_order": sort_order
19
+ }
20
+ response = requests.post(url, json=data, headers=self.headers)
21
+ response.raise_for_status()
22
+ return response.json()
23
+ except requests.RequestException as e:
24
+ return {"error": str(e)}
25
+
26
+ def list_collections(self):
27
+ try:
28
+ url = f"{self.base_url}collections/"
29
+ response = requests.get(url, headers=self.headers)
30
+ response.raise_for_status()
31
+ return response.json()
32
+ except requests.RequestException as e:
33
+ return {"error": str(e)}
34
+
35
+ def search_files(self, query, collection_id, page=1, page_size=10, sort_by=None, sort_order="asc"):
36
+ try:
37
+ url = f"{self.base_url}collections/search"
38
+ data = {
39
+ "query": query,
40
+ "collection_id": collection_id,
41
+ "page": page,
42
+ "page_size": page_size,
43
+ "sort_by": sort_by,
44
+ "sort_order": sort_order
45
+ }
46
+ response = requests.post(url, json=data, headers=self.headers)
47
+ response.raise_for_status()
48
+ return response.json()
49
+ except requests.RequestException as e:
50
+ return {"error": str(e)}
51
+
52
+ def get_file_by_id(self, file_id):
53
+ try:
54
+ url = f"{self.base_url}collections/file/{file_id}"
55
+ response = requests.get(url, headers=self.headers)
56
+ response.raise_for_status()
57
+ return response.json()
58
+ except requests.RequestException as e:
59
+ return {"error": str(e)}
60
+
61
+ def delete_file_by_id(self, file_id):
62
+ try:
63
+ url = f"{self.base_url}collections/file/{file_id}"
64
+ response = requests.delete(url, headers=self.headers)
65
+ response.raise_for_status()
66
+ return response.json()
67
+ except requests.RequestException as e:
68
+ return {"error": str(e)}
69
+
70
+ def delete_collection(self, collection_id):
71
+ try:
72
+ url = f"{self.base_url}collections/{collection_id}"
73
+ response = requests.delete(url, headers=self.headers)
74
+ response.raise_for_status()
75
+ return response.json()
76
+ except requests.RequestException as e:
77
+ return {"error": str(e)}
@@ -6,45 +6,61 @@ class Embed:
6
6
  self.headers = headers
7
7
 
8
8
  def video(self, model_id: str, input: str, input_type: str):
9
- url = f"{self.base_url}embed/"
10
- data = {
11
- "modality": "video",
12
- "model_id": model_id,
13
- "input": input,
14
- "input_type": input_type
15
- }
16
- response = requests.post(url, json=data, headers=self.headers)
17
- return response.json()
9
+ try:
10
+ url = f"{self.base_url}embed/"
11
+ data = {
12
+ "modality": "video",
13
+ "model_id": model_id,
14
+ "input": input,
15
+ "input_type": input_type
16
+ }
17
+ response = requests.post(url, json=data, headers=self.headers)
18
+ response.raise_for_status()
19
+ return response.json()
20
+ except requests.RequestException as e:
21
+ return {"error": str(e)}
18
22
 
19
23
  def text(self, model_id: str, input: str, input_type: str):
20
- url = f"{self.base_url}embed/"
21
- data = {
22
- "modality": "text",
23
- "model_id": model_id,
24
- "input": input,
25
- "input_type": input_type
26
- }
27
- response = requests.post(url, json=data, headers=self.headers)
28
- return response.json()
24
+ try:
25
+ url = f"{self.base_url}embed/"
26
+ data = {
27
+ "modality": "text",
28
+ "model_id": model_id,
29
+ "input": input,
30
+ "input_type": input_type
31
+ }
32
+ response = requests.post(url, json=data, headers=self.headers)
33
+ response.raise_for_status()
34
+ return response.json()
35
+ except requests.RequestException as e:
36
+ return {"error": str(e)}
29
37
 
30
38
  def image(self, model_id: str, input: str, input_type: str):
31
- url = f"{self.base_url}embed/"
32
- data = {
33
- "modality": "image",
34
- "model_id": model_id,
35
- "input": input,
36
- "input_type": input_type
37
- }
38
- response = requests.post(url, json=data, headers=self.headers)
39
- return response.json()
39
+ try:
40
+ url = f"{self.base_url}embed/"
41
+ data = {
42
+ "modality": "image",
43
+ "model_id": model_id,
44
+ "input": input,
45
+ "input_type": input_type
46
+ }
47
+ response = requests.post(url, json=data, headers=self.headers)
48
+ response.raise_for_status()
49
+ return response.json()
50
+ except requests.RequestException as e:
51
+ return {"error": str(e)}
40
52
 
41
53
  def audio(self, model_id: str, input: str, input_type: str):
42
- url = f"{self.base_url}embed/"
43
- data = {
44
- "modality": "audio",
45
- "model_id": model_id,
46
- "input": input,
47
- "input_type": input_type
48
- }
49
- response = requests.post(url, json=data, headers=self.headers)
50
- return response.json()
54
+ try:
55
+ url = f"{self.base_url}embed/"
56
+ data = {
57
+ "modality": "audio",
58
+ "model_id": model_id,
59
+ "input": input,
60
+ "input_type": input_type
61
+ }
62
+ response = requests.post(url, json=data, headers=self.headers)
63
+ response.raise_for_status()
64
+ return response.json()
65
+ except requests.RequestException as e:
66
+ return {"error": str(e)}
@@ -0,0 +1,51 @@
1
+ import requests
2
+
3
+ class Index:
4
+ def __init__(self, base_url, headers):
5
+ self.base_url = base_url
6
+ self.headers = headers
7
+
8
+ def upload(self, file, collection_id, metadata=None, settings=None):
9
+ try:
10
+ url = f"{self.base_url}index/upload"
11
+ files = {"file": file}
12
+ data = {
13
+ "collection_id": collection_id,
14
+ "metadata": metadata,
15
+ "settings": settings
16
+ }
17
+ response = requests.post(url, files=files, data=data, headers=self.headers)
18
+ response.raise_for_status()
19
+ return response.json()
20
+ except requests.RequestException as e:
21
+ return {"error": str(e)}
22
+
23
+ def url(self, url, collection_id, metadata=None, settings=None):
24
+ try:
25
+ url = f"{self.base_url}index/url"
26
+ data = {
27
+ "url": url,
28
+ "collection_id": collection_id,
29
+ "metadata": metadata,
30
+ "settings": settings
31
+ }
32
+ response = requests.post(url, json=data, headers=self.headers)
33
+ response.raise_for_status()
34
+ return response.json()
35
+ except requests.RequestException as e:
36
+ return {"error": str(e)}
37
+
38
+ def youtube(self, youtube_video_id, collection_id, metadata=None, settings=None):
39
+ try:
40
+ url = f"{self.base_url}index/youtube"
41
+ data = {
42
+ "youtube_video_id": youtube_video_id,
43
+ "collection_id": collection_id,
44
+ "metadata": metadata,
45
+ "settings": settings
46
+ }
47
+ response = requests.post(url, json=data, headers=self.headers)
48
+ response.raise_for_status()
49
+ return response.json()
50
+ except requests.RequestException as e:
51
+ return {"error": str(e)}
@@ -0,0 +1,60 @@
1
+ import requests
2
+
3
+ class Search:
4
+ def __init__(self, base_url, headers):
5
+ self.base_url = base_url
6
+ self.headers = headers
7
+
8
+ def text(self, input, modality, input_type="text", filters=None, group_by_file=True, page=1, page_size=10):
9
+ try:
10
+ url = f"{self.base_url}search/text"
11
+ data = {
12
+ "input": input,
13
+ "modality": modality,
14
+ "input_type": input_type,
15
+ "filters": filters or {},
16
+ "group_by_file": group_by_file,
17
+ "pagination": {
18
+ "page": page,
19
+ "page_size": page_size
20
+ }
21
+ }
22
+ response = requests.post(url, json=data, headers=self.headers)
23
+ response.raise_for_status()
24
+ return response.json()
25
+ except requests.RequestException as e:
26
+ return {"error": str(e)}
27
+
28
+ def upload(self, file, filters=None, page=1, page_size=10):
29
+ try:
30
+ url = f"{self.base_url}search/upload"
31
+ files = {"file": file}
32
+ data = {
33
+ "filters": filters or "{}",
34
+ "page": page,
35
+ "page_size": page_size
36
+ }
37
+ response = requests.post(url, files=files, data=data, headers=self.headers)
38
+ response.raise_for_status()
39
+ return response.json()
40
+ except requests.RequestException as e:
41
+ return {"error": str(e)}
42
+
43
+ def url(self, url, input_type="file", filters=None, modality="text", page=1, page_size=10):
44
+ try:
45
+ url = f"{self.base_url}search/url"
46
+ data = {
47
+ "url": url,
48
+ "input_type": input_type,
49
+ "filters": filters or {},
50
+ "modality": modality,
51
+ "pagination": {
52
+ "page": page,
53
+ "page_size": page_size
54
+ }
55
+ }
56
+ response = requests.post(url, json=data, headers=self.headers)
57
+ response.raise_for_status()
58
+ return response.json()
59
+ except requests.RequestException as e:
60
+ return {"error": str(e)}
@@ -1,24 +1,106 @@
1
- import requests
2
-
1
+ import subprocess
2
+ import tempfile
3
+ import os
4
+ import base64
5
+ from urllib.parse import urlparse
6
+ from urllib.request import urlretrieve
7
+ from tqdm import tqdm
3
8
 
4
9
  class Tools:
5
- def __init__(self, base_url, headers):
6
- self.base_url = base_url
7
- self.headers = headers
10
+ def __init__(self):
8
11
  self.video = self.Video(self)
9
12
 
10
13
  class Video:
11
14
  def __init__(self, parent):
12
- self.base_url = parent.base_url
13
- self.headers = parent.headers
14
-
15
- def process(self, url: str, frame_interval: int, resolution: list, return_base64: bool):
16
- endpoint = f"{self.base_url}tools/video/process"
17
- data = {
18
- "url": url,
19
- "frame_interval": frame_interval,
20
- "resolution": resolution,
21
- "return_base64": return_base64
22
- }
23
- response = requests.post(endpoint, json=data, headers=self.headers)
24
- return response.json()
15
+ pass
16
+
17
+ def process(self, video_source: str, chunk_interval: float, resolution: list):
18
+ chunker = VideoChunker(video_source, chunk_interval, resolution)
19
+
20
+ for chunk in chunker:
21
+ data = {
22
+ "base64_chunk": chunk["base64"],
23
+ "start_time": chunk["start_time"],
24
+ "end_time": chunk["end_time"]
25
+ }
26
+ yield data
27
+
28
+ class VideoChunker:
29
+ def __init__(self, video_source, chunk_interval, target_resolution):
30
+ self.video_source = video_source
31
+ self.chunk_interval = chunk_interval
32
+ self.target_resolution = f"{target_resolution[0]}x{target_resolution[1]}"
33
+ self.temp_dir = tempfile.mkdtemp()
34
+ self.total_duration = None
35
+ self.current_time = 0
36
+
37
+ def __del__(self):
38
+ self.cleanup()
39
+
40
+ def cleanup(self):
41
+ for file in os.listdir(self.temp_dir):
42
+ os.remove(os.path.join(self.temp_dir, file))
43
+ os.rmdir(self.temp_dir)
44
+
45
+ def __iter__(self):
46
+ return self
47
+
48
+ def __next__(self):
49
+ if self.total_duration is None:
50
+ self._initialize_video()
51
+
52
+ chunk = self._process_chunk()
53
+ if chunk is None:
54
+ raise StopIteration
55
+ return chunk
56
+
57
+ def _initialize_video(self):
58
+ if urlparse(self.video_source).scheme in ('http', 'https'):
59
+ print("Downloading video...")
60
+ temp_file = os.path.join(self.temp_dir, 'temp_video')
61
+ urlretrieve(self.video_source, temp_file)
62
+ self.video_source = temp_file
63
+
64
+ # Get video duration
65
+ result = subprocess.run(['ffprobe', '-v', 'error', '-show_entries', 'format=duration', '-of', 'default=noprint_wrappers=1:nokey=1', self.video_source], stdout=subprocess.PIPE, stderr=subprocess.PIPE)
66
+ self.total_duration = float(result.stdout)
67
+
68
+ print(f"Total video duration: {self.total_duration:.2f} seconds")
69
+ self.progress_bar = tqdm(total=100, desc="Processing video", unit="%")
70
+
71
+ def _process_chunk(self):
72
+ if self.current_time >= self.total_duration:
73
+ return None
74
+
75
+ start_time = self.current_time
76
+ end_time = min(start_time + self.chunk_interval, self.total_duration)
77
+
78
+ # Generate chunk using FFmpeg
79
+ temp_output = os.path.join(self.temp_dir, f"chunk_{self.current_time}.mp4")
80
+ subprocess.run([
81
+ 'ffmpeg', '-y', '-i', self.video_source,
82
+ '-ss', str(start_time), '-to', str(end_time),
83
+ '-vf', f'scale={self.target_resolution}',
84
+ '-c:v', 'libx264', '-preset', 'ultrafast',
85
+ temp_output
86
+ ], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
87
+
88
+ # Convert to base64
89
+ with open(temp_output, 'rb') as f:
90
+ base64_string = base64.b64encode(f.read()).decode('utf-8')
91
+
92
+ # Remove temporary file
93
+ os.remove(temp_output)
94
+
95
+ # Update progress
96
+ progress = (end_time / self.total_duration) * 100
97
+ self.progress_bar.n = int(progress)
98
+ self.progress_bar.refresh()
99
+
100
+ self.current_time = end_time
101
+
102
+ return {
103
+ "base64": base64_string,
104
+ "start_time": start_time,
105
+ "end_time": end_time
106
+ }
@@ -0,0 +1,324 @@
1
+ Metadata-Version: 2.1
2
+ Name: mixpeek
3
+ Version: 0.7.0
4
+ Summary: Mixpeek Python SDK
5
+ Home-page: https://github.com/mixpeek/mixpeek-python
6
+ Author: Ethan Steininger
7
+ Author-email: ethan@mixpeek.com
8
+ Classifier: Programming Language :: Python :: 3
9
+ Classifier: License :: OSI Approved :: MIT License
10
+ Classifier: Operating System :: OS Independent
11
+ Requires-Python: >=3.6
12
+ Description-Content-Type: text/markdown
13
+ Requires-Dist: requests ==2.32.3
14
+ Requires-Dist: pydantic ==2.7.3
15
+ Requires-Dist: tqdm ==4.66.4
16
+ Requires-Dist: urllib3 ==2.2.1
17
+
18
+ # Mixpeek Python SDK
19
+
20
+ This SDK provides easy access to the Mixpeek API for Python developers.
21
+
22
+ ## Installation
23
+
24
+ ```bash
25
+ pip install mixpeek
26
+ ```
27
+
28
+ ## Usage
29
+
30
+ First, import and initialize the Mixpeek client:
31
+
32
+ ```python
33
+ from mixpeek import Mixpeek
34
+
35
+ client = Mixpeek(api_key="your_api_key_here")
36
+ ```
37
+
38
+ ### Embed
39
+
40
+ The `embed` module provides methods for embedding various types of data.
41
+
42
+ #### Video Embedding
43
+
44
+ ```python
45
+ response = client.embed.video(model_id="model_id", input="video_data", input_type="base64")
46
+ ```
47
+
48
+ - `model_id` (str): The ID of the model to use for embedding.
49
+ - `input` (str): The video data or URL.
50
+ - `input_type` (str): Either "base64" or "url".
51
+
52
+ #### Text Embedding
53
+
54
+ ```python
55
+ response = client.embed.text(model_id="model_id", input="text_to_embed", input_type="text")
56
+ ```
57
+
58
+ - `model_id` (str): The ID of the model to use for embedding.
59
+ - `input` (str): The text to embed.
60
+ - `input_type` (str): Should be "text".
61
+
62
+ #### Image Embedding
63
+
64
+ ```python
65
+ response = client.embed.image(model_id="model_id", input="image_data", input_type="base64")
66
+ ```
67
+
68
+ - `model_id` (str): The ID of the model to use for embedding.
69
+ - `input` (str): The image data or URL.
70
+ - `input_type` (str): Either "base64" or "url".
71
+
72
+ #### Audio Embedding
73
+
74
+ ```python
75
+ response = client.embed.audio(model_id="model_id", input="audio_data", input_type="base64")
76
+ ```
77
+
78
+ - `model_id` (str): The ID of the model to use for embedding.
79
+ - `input` (str): The audio data or URL.
80
+ - `input_type` (str): Either "base64" or "url".
81
+
82
+ ### Index
83
+
84
+ The `index` module provides methods for indexing various types of content.
85
+
86
+ #### Upload File
87
+
88
+ ```python
89
+ with open("file.txt", "rb") as file:
90
+ response = client.index.upload(file=file, collection_id="collection_id", metadata={"key": "value"}, settings={"video": {"transcribe": True}})
91
+ ```
92
+
93
+ - `file` (file object): The file to upload.
94
+ - `collection_id` (str): The ID of the collection to add the file to.
95
+ - `metadata` (dict, optional): Additional metadata for the file.
96
+ - `settings` (dict, optional): Processing settings for the file.
97
+
98
+ #### Index URL
99
+
100
+ ```python
101
+ response = client.index.url(url="https://example.com", collection_id="collection_id", metadata={"key": "value"}, settings={"image": {"caption_model_id": "model_id"}})
102
+ ```
103
+
104
+ - `url` (str): The URL to index.
105
+ - `collection_id` (str): The ID of the collection to add the content to.
106
+ - `metadata` (dict, optional): Additional metadata for the content.
107
+ - `settings` (dict, optional): Processing settings for the content.
108
+
109
+ #### Index YouTube Video
110
+
111
+ ```python
112
+ response = client.index.youtube(youtube_video_id="video_id", collection_id="collection_id", metadata={"key": "value"}, settings={"video": {"transcribe": True}})
113
+ ```
114
+
115
+ - `youtube_video_id` (str): The ID of the YouTube video to index.
116
+ - `collection_id` (str): The ID of the collection to add the video to.
117
+ - `metadata` (dict, optional): Additional metadata for the video.
118
+ - `settings` (dict, optional): Processing settings for the video.
119
+
120
+ #### Search YouTube
121
+
122
+ ```python
123
+ response = client.index.youtube_search(query="search query", collection_id="collection_id", max_results=20, metadata={"key": "value"}, shorts_only=False)
124
+ ```
125
+
126
+ - `query` (str): The search query for YouTube videos.
127
+ - `collection_id` (str): The ID of the collection to add the search results to.
128
+ - `max_results` (int, optional): Maximum number of results to return (default: 10, max: 500).
129
+ - `metadata` (dict, optional): Additional metadata for the search results.
130
+ - `shorts_only` (bool, optional): Whether to search for YouTube Shorts only (default: False).
131
+
132
+ ### Search
133
+
134
+ The `search` module provides methods for searching indexed content.
135
+
136
+ #### Text Search
137
+
138
+ ```python
139
+ response = client.search.text(input="search query", modality="text", input_type="text", filters={"key": "value"}, group_by_file=True, page=1, page_size=10)
140
+ ```
141
+
142
+ - `input` (str): The search query or input data.
143
+ - `modality` (str): The modality of the search (e.g., "text", "image", "video", "audio").
144
+ - `input_type` (str, optional): The type of input (default: "text").
145
+ - `filters` (dict, optional): Additional filters for the search.
146
+ - `group_by_file` (bool, optional): Whether to group results by file (default: True).
147
+ - `page` (int, optional): The page number for pagination (default: 1).
148
+ - `page_size` (int, optional): The number of results per page (default: 10).
149
+
150
+ #### Upload Search
151
+
152
+ ```python
153
+ with open("query_image.jpg", "rb") as file:
154
+ response = client.search.upload(file=file, filters={"key": "value"}, page=1, page_size=10)
155
+ ```
156
+
157
+ - `file` (file object): The file to use as a search query.
158
+ - `filters` (dict, optional): Additional filters for the search.
159
+ - `page` (int, optional): The page number for pagination (default: 1).
160
+ - `page_size` (int, optional): The number of results per page (default: 10).
161
+
162
+ #### URL Search
163
+
164
+ ```python
165
+ response = client.search.url(url="https://example.com/image.jpg", input_type="file", filters={"key": "value"}, modality="image", page=1, page_size=10)
166
+ ```
167
+
168
+ - `url` (str): The URL of the file to use as a search query.
169
+ - `input_type` (str, optional): The type of input (default: "file").
170
+ - `filters` (dict, optional): Additional filters for the search.
171
+ - `modality` (str, optional): The modality of the search (default: "text").
172
+ - `page` (int, optional): The page number for pagination (default: 1).
173
+ - `page_size` (int, optional): The number of results per page (default: 10).
174
+
175
+ ### Collections
176
+
177
+ The `collections` module provides methods for managing collections and files.
178
+
179
+ #### List Files
180
+
181
+ ```python
182
+ response = client.collections.list_files(collection_id="collection_id", randomize=False, page=1, page_size=10, filters={"key": "value"}, sort_by="created_at", sort_order="desc")
183
+ ```
184
+
185
+ - `collection_id` (str): The ID of the collection to list files from.
186
+ - `randomize` (bool, optional): Whether to randomize the results (default: False).
187
+ - `page` (int, optional): The page number for pagination (default: 1).
188
+ - `page_size` (int, optional): The number of results per page (default: 10).
189
+ - `filters` (dict, optional): Additional filters for the file list.
190
+ - `sort_by` (str, optional): The field to sort by.
191
+ - `sort_order` (str, optional): The sort order, either "asc" or "desc" (default: "asc").
192
+
193
+ #### List Collections
194
+
195
+ ```python
196
+ response = client.collections.list_collections()
197
+ ```
198
+
199
+ #### Search Files
200
+
201
+ ```python
202
+ response = client.collections.search_files(query="search query", collection_id="collection_id", page=1, page_size=10, sort_by="relevance", sort_order="desc")
203
+ ```
204
+
205
+ - `query` (str): The search query for files within the collection.
206
+ - `collection_id` (str): The ID of the collection to search in.
207
+ - `page` (int, optional): The page number for pagination (default: 1).
208
+ - `page_size` (int, optional): The number of results per page (default: 10).
209
+ - `sort_by` (str, optional): The field to sort by.
210
+ - `sort_order` (str, optional): The sort order, either "asc" or "desc" (default: "asc").
211
+
212
+ #### Get File by ID
213
+
214
+ ```python
215
+ response = client.collections.get_file_by_id(file_id="file_id")
216
+ ```
217
+
218
+ - `file_id` (str): The ID of the file to retrieve.
219
+
220
+ #### Delete File by ID
221
+
222
+ ```python
223
+ response = client.collections.delete_file_by_id(file_id="file_id")
224
+ ```
225
+
226
+ - `file_id` (str): The ID of the file to delete.
227
+
228
+ #### Delete Collection
229
+
230
+ ```python
231
+ response = client.collections.delete_collection(collection_id="collection_id")
232
+ ```
233
+
234
+ - `collection_id` (str): The ID of the collection to delete.
235
+
236
+ Certainly! I'll update the README to include information about the `tools` service, specifically the video processing functionality. Here's the addition to the README:
237
+
238
+ ### Tools
239
+
240
+ The `tools` module provides utility functions for processing various types of data before embedding or indexing.
241
+
242
+ #### Video Processing
243
+
244
+ The `video` tool allows you to process video files or URLs into chunks for easier embedding.
245
+
246
+ ```python
247
+ from mixpeek import Mixpeek
248
+
249
+ mixpeek = Mixpeek('your_api_key_here')
250
+
251
+ video_url = "https://example.com/video.mp4"
252
+
253
+ # Process video chunks, this runs locally
254
+ processed_chunks = mixpeek.tools.video.process(
255
+ video_source=video_url,
256
+ chunk_interval=1,
257
+ resolution=[720, 1280]
258
+ )
259
+
260
+ # Embed each chunk
261
+ results = []
262
+ for index, chunk in enumerate(processed_chunks):
263
+ print(f"Processing video chunk: {index}")
264
+
265
+ embedding = mixpeek.embed.video(
266
+ model_id="vuse-generic-v1",
267
+ input=chunk['base64_chunk'],
268
+ input_type="base64"
269
+ )['embedding']
270
+
271
+ result = {
272
+ "start_time": chunk["start_time"],
273
+ "end_time": chunk["end_time"],
274
+ "embedding": embedding
275
+ }
276
+ results.append(result)
277
+ print(f" Embedding preview: {embedding[:5] + ['...'] + embedding[-5:]}")
278
+ print("Insert into DB here")
279
+
280
+ print(f"Processed {len(results)} chunks")
281
+ ```
282
+
283
+ Parameters for `mixpeek.tools.video.process`:
284
+
285
+ - `video_source` (str): URL or file path of the video to process.
286
+ - `chunk_interval` (float): Duration of each video chunk in seconds.
287
+ - `resolution` (list): Desired resolution of the video chunks as [height, width].
288
+
289
+ The `process` method returns a list of dictionaries, each containing:
290
+
291
+ - `start_time` (float): Start time of the chunk in seconds.
292
+ - `end_time` (float): End time of the chunk in seconds.
293
+ - `base64_chunk` (str): Base64-encoded video chunk.
294
+
295
+ This tool is particularly useful when you need to embed long videos, as it allows you to process the video in smaller chunks and embed each chunk separately.
296
+
297
+ ## Response Format
298
+
299
+ All methods return a JSON response. In case of an error, the response will contain an "error" key with a description of the error.
300
+
301
+ ## Error Handling
302
+
303
+ The SDK handles HTTP errors and returns them in the response. You should always check for the presence of an "error" key in the response before processing the results.
304
+
305
+ ```python
306
+ response = client.embed.text(model_id="model_id", input="text_to_embed", input_type="text")
307
+ if "error" in response:
308
+ print(f"An error occurred: {response['error']}")
309
+ else:
310
+ # Process the successful response
311
+ print(response)
312
+ ```
313
+
314
+ ## Rate Limiting
315
+
316
+ The Mixpeek API may have rate limits. If you encounter rate limiting errors, you should implement appropriate backoff and retry logic in your application.
317
+
318
+ ## Support
319
+
320
+ For any issues or questions, please contact Mixpeek support or refer to the official API documentation.
321
+
322
+ ```
323
+
324
+ ```
@@ -0,0 +1,13 @@
1
+ mixpeek/__init__.py,sha256=XDdcK7wTEOEcF1cp-GeWmgPJ21Ny1R9pB0PPNrdDTMo,28
2
+ mixpeek/client.py,sha256=JFQO-hLFe7MESdkNoweuoaGrc4Vnqtl55_JRvgDCspg,804
3
+ mixpeek/exceptions.py,sha256=Orhdo5UFLn3fcWVJtlgkznW8Iy5ndL96h0qTY8zOlDA,235
4
+ mixpeek/endpoints/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
5
+ mixpeek/endpoints/collections.py,sha256=ffOBenNHG-mVpjT7kQcOfn78-wjgjdQTVyrloyeAy90,2890
6
+ mixpeek/endpoints/embed.py,sha256=ybDqyIt0oJiwIdH1QSwuV-CPeJgd-zW0etNAcBdgZYE,2290
7
+ mixpeek/endpoints/index.py,sha256=2WcjrRHCMqqj0ddWEkl1f5U81Ni9xKySC1_sdkrhnhM,1857
8
+ mixpeek/endpoints/search.py,sha256=fp78miF5bSWTiQn32NMoth9mlLBsj7NfjNzS6HrEz3Q,2170
9
+ mixpeek/endpoints/tools.py,sha256=QH3X0tYKr-W_9ApENgP7PeuLJi7Qr9W2jrrgQFH4F2A,3567
10
+ mixpeek-0.7.0.dist-info/METADATA,sha256=kmaWlwIRerVhl4H_6cZHyvE8Kek6S4uNy5V8YDkDqhk,10754
11
+ mixpeek-0.7.0.dist-info/WHEEL,sha256=Mdi9PDNwEZptOjTlUcAth7XJDFtKrHYaQMPulZeBCiQ,91
12
+ mixpeek-0.7.0.dist-info/top_level.txt,sha256=EJ8Jc4IhqyUwnUlBwKbs498Ju4O9a-IDh2kXc_lo6Vg,8
13
+ mixpeek-0.7.0.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (70.1.0)
2
+ Generator: setuptools (73.0.1)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5
 
@@ -1,71 +0,0 @@
1
- import requests
2
- import os
3
-
4
- class Connections:
5
- def __init__(self, base_url, headers, api_key):
6
- self.base_url = base_url
7
- self.headers = headers
8
- self.api_key = api_key
9
- self.data = self.Data(self)
10
- self.storage = self.Storage(self)
11
-
12
- def create(self, alias: str, engine: str, details: dict):
13
- url = f"{self.base_url}connections/"
14
- data = {
15
- "alias": alias,
16
- "engine": engine,
17
- "details": details
18
- }
19
- response = requests.post(url, json=data, headers=self.headers)
20
- return response.json()
21
-
22
- class Data:
23
- def __init__(self, parent):
24
- self.base_url = parent.base_url
25
- self.headers = parent.headers
26
-
27
- # mixpeek.connections.data.insert(connection_id="123", payload={"key": "value"})
28
- def insert(self, connection_id: str, payload: list):
29
- pass
30
-
31
- # mixpeek.connections.data.delete(connection_id="123", filters={"key": "value"})
32
- def delete(self, connection_id: str, filters: dict):
33
- pass
34
-
35
- # mixpeek.connections.data.upsert(connection_id="123", payload={"key": "value"}, filters={"key": "value"})
36
- def upsert(self, connection_id: str, payload: dict, filters: dict):
37
- pass
38
-
39
-
40
- class Storage:
41
- def __init__(self, parent):
42
- self.base_url = parent.base_url
43
- self.headers = parent.headers
44
- self.api_key = parent.api_key
45
-
46
- def upload(self, connection_id: str, file_path: str, prefix: str = None):
47
- url = f"{self.base_url}connections/storage?connection_id={connection_id}"
48
- if prefix:
49
- url += f"&prefix={prefix}"
50
-
51
- files=[
52
- ('file',(os.path.basename(file_path),open(file_path,'rb'),'application/octet-stream'))
53
- ]
54
- headers = {
55
- 'Authorization': f'Bearer {self.api_key}'
56
- }
57
-
58
- response = requests.request("POST", url, headers=headers, files=files)
59
-
60
- return response.json()
61
-
62
-
63
- # mixpeek.connections.storage.delete(connection_id="123", file_name="example.txt")
64
- def delete(self, connection_id: str, file_name: str):
65
- url = f"{self.base_url}storage/{connection_id}/delete/{file_name}"
66
- response = requests.delete(url, headers=self.headers)
67
- return response.json()
68
-
69
-
70
-
71
-
@@ -1,55 +0,0 @@
1
- import requests
2
-
3
- class Extract:
4
- def __init__(self, base_url, headers):
5
- self.base_url = base_url
6
- self.headers = headers
7
-
8
- def automatic(self, input: str, input_type: str):
9
- url = f"{self.base_url}extract/"
10
- data = {
11
- "input": input,
12
- "input_type": input_type
13
- }
14
- response = requests.post(url, json=data, headers=self.headers)
15
- return response.json()
16
-
17
- def video(self, input: str, input_type: str):
18
- url = f"{self.base_url}extract/"
19
- data = {
20
- "modality": "video",
21
- "input": input,
22
- "input_type": input_type
23
- }
24
- response = requests.post(url, json=data, headers=self.headers)
25
- return response.json()
26
-
27
- def audio(self, input: str, input_type: str):
28
- url = f"{self.base_url}extract/"
29
- data = {
30
- "modality": "audio",
31
- "input": input,
32
- "input_type": input_type
33
- }
34
- response = requests.post(url, json=data, headers=self.headers)
35
- return response.json()
36
-
37
- def image(self, input: str, input_type: str):
38
- url = f"{self.base_url}extract/"
39
- data = {
40
- "modality": "image",
41
- "input": input,
42
- "input_type": input_type
43
- }
44
- response = requests.post(url, json=data, headers=self.headers)
45
- return response.json()
46
-
47
- def text(self, input: str, input_type: str):
48
- url = f"{self.base_url}extract/"
49
- data = {
50
- "modality": "text",
51
- "input": input,
52
- "input_type": input_type
53
- }
54
- response = requests.post(url, json=data, headers=self.headers)
55
- return response.json()
@@ -1,17 +0,0 @@
1
- import requests
2
- from pydantic import BaseModel
3
-
4
- class Generate:
5
- def __init__(self, base_url, headers):
6
- self.base_url = base_url
7
- self.headers = headers
8
-
9
- def text(self, model_id: str, response_format: BaseModel, context: str):
10
- url = f"{self.base_url}generate/text"
11
- data = {
12
- "model_id": model_id,
13
- "response_format": response_format.schema_json(), # Ensure correct method to get JSON schema
14
- "context": context
15
- }
16
- response = requests.post(url, json=data, headers=self.headers)
17
- return response.json()
@@ -1,37 +0,0 @@
1
- import requests
2
-
3
- class Pipelines:
4
- def __init__(self, base_url, headers):
5
- self.base_url = base_url
6
- self.headers = headers
7
-
8
- # mixpeek.pipelines.enable
9
- def enable(self, pipeline_id: str, enable: bool):
10
- url = f"{self.base_url}pipelines/{pipeline_id}/enable"
11
- data = {
12
- "enable": enable
13
- }
14
- response = requests.patch(url, json=data, headers=self.headers)
15
- return response.json()
16
-
17
- # mixpeek.pipelines.create
18
- def create(self, alias: str, code: str, destination: dict, source: dict):
19
- url = f"{self.base_url}pipelines/"
20
- data = {
21
- "alias": alias,
22
- "code": code,
23
- "destination": destination,
24
- "source": source
25
- }
26
- response = requests.post(url, json=data, headers=self.headers)
27
- return response.json()
28
-
29
- # mixpeek.pipelines.invoke
30
- def invoke(self, pipeline_id: str, payload: dict, options: dict):
31
- url = f"{self.base_url}pipelines/invoke/{pipeline_id}"
32
- data = {
33
- "payload": payload,
34
- "options": options
35
- }
36
- response = requests.post(url, json=data, headers=self.headers)
37
- return response.json()
@@ -1,122 +0,0 @@
1
- Metadata-Version: 2.1
2
- Name: mixpeek
3
- Version: 0.6.29
4
- Summary: Mixpeek Python SDK
5
- Home-page: https://github.com/mixpeek/mixpeek-python
6
- Author: Ethan Steininger
7
- Author-email: ethan@mixpeek.com
8
- Classifier: Programming Language :: Python :: 3
9
- Classifier: License :: OSI Approved :: MIT License
10
- Classifier: Operating System :: OS Independent
11
- Requires-Python: >=3.6
12
- Description-Content-Type: text/markdown
13
- Requires-Dist: requests ==2.32.3
14
- Requires-Dist: pydantic ==2.7.3
15
-
16
- # Mixpeek SDK
17
-
18
- A Python SDK for the Mixpeek API.
19
-
20
- ## Installation
21
-
22
- ```bash
23
- pip install mixpeek
24
- ```
25
-
26
- ## Usage
27
-
28
- ```python
29
- from mixpeek import Mixpeek
30
- from pydantic import BaseModel
31
-
32
- mixpeek = Mixpeek("YOUR_API_KEY")
33
-
34
- ```
35
-
36
- ### Extract
37
-
38
- ```python
39
- extraction = mixpeek.extract.text(
40
- input="s3://document.pdf",
41
- input_type="url"
42
- )
43
- ```
44
-
45
- ### Embed
46
-
47
- ```python
48
- embedding = mixpeek.embed.video(
49
- model_id="mixpeek/vuse-generic-v1",
50
- input="s3://waving_boy.mp4",
51
- input_type="url"
52
- )
53
- ```
54
-
55
- ### Generate
56
-
57
- ```python
58
- class ResponseFormat(BaseModel):
59
- city: int
60
- weather: float
61
-
62
- generated_content = mixpeek.generate.text(
63
- model_id="openai/gpt-4-turbo",
64
- response_format=ResponseFormat,
65
- context="Please tell me the weather and make sure to respond in the provided JSON schema"
66
- )
67
- ```
68
-
69
- ### Connections
70
-
71
- Create connection
72
-
73
- ```python
74
- mixpeek.connections.create(
75
- alias="my-mongo-test",
76
- engine="mongodb",
77
- details={
78
- "host": "your_host_address",
79
- "database": "your_database_name",
80
- "username": "your_username",
81
- "password": "your_password"
82
- }
83
- )
84
- ```
85
-
86
- Insert data
87
-
88
- ```python
89
- mixpeek.connections.data.insert(
90
- connection_id="conn_123",
91
- payload={}
92
- )
93
- ```
94
-
95
- Insert data
96
-
97
- ```python
98
- mixpeek.connections.data.delete(
99
- connection_id="conn_321"
100
- filters={}
101
- )
102
- ```
103
-
104
- Upload file
105
-
106
- ```python
107
- mixpeek.connections.storage.upload(
108
- connection_id="conn_321",
109
- file_path="/my/local/file.mp4"
110
- )
111
- ```
112
-
113
- ### Tools
114
-
115
- ```python
116
- response = mixpeek.tools.video.process(
117
- url="https://s3/video.mp4",
118
- frame_interval=5,
119
- resolution=[720, 1280],
120
- return_base64=True
121
- )
122
- ```
@@ -1,14 +0,0 @@
1
- mixpeek/__init__.py,sha256=XDdcK7wTEOEcF1cp-GeWmgPJ21Ny1R9pB0PPNrdDTMo,28
2
- mixpeek/client.py,sha256=ifN-rCXtCbitTj6coUquQjpHgd98V3EVwORzNo8tIMY,992
3
- mixpeek/exceptions.py,sha256=Orhdo5UFLn3fcWVJtlgkznW8Iy5ndL96h0qTY8zOlDA,235
4
- mixpeek/endpoints/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
5
- mixpeek/endpoints/connections.py,sha256=S9_328Kk3dXO4icyKPBTlAd7M83lmD4hG2fFtKjfAwQ,2430
6
- mixpeek/endpoints/embed.py,sha256=8ds_FinxZRW-ZQyv6LjDAX6Zoek2Cv4OYhwIgSBqwTs,1598
7
- mixpeek/endpoints/extract.py,sha256=PRY1ZjPPwJ3xEKNKVdWIm_WZeRNc68oJKcwVXNkv-S8,1706
8
- mixpeek/endpoints/generate.py,sha256=SFjVYfgeuIt4wO0I5ItnB4TEHhRkLgZOvQfWlEioye8,594
9
- mixpeek/endpoints/pipelines.py,sha256=X2mRsWgOx6FgWbInrAdjLQJbjXjorNqAo9mjFawLpoE,1210
10
- mixpeek/endpoints/tools.py,sha256=Ni9AYm2-jpDS1SLUohNLTOkXDUxLPnNLpTlhWJLfofs,791
11
- mixpeek-0.6.29.dist-info/METADATA,sha256=nZ9ZirutvyKfp5gg0C_fdUDAOuNbUO09k_TPLg68zaw,2083
12
- mixpeek-0.6.29.dist-info/WHEEL,sha256=cpQTJ5IWu9CdaPViMhC9YzF8gZuS5-vlfoFihTBC86A,91
13
- mixpeek-0.6.29.dist-info/top_level.txt,sha256=EJ8Jc4IhqyUwnUlBwKbs498Ju4O9a-IDh2kXc_lo6Vg,8
14
- mixpeek-0.6.29.dist-info/RECORD,,