aavaaz 0.9.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. aavaaz/__init__.py +3 -0
  2. aavaaz/api/__init__.py +1 -0
  3. aavaaz/api/auth.py +72 -0
  4. aavaaz/api/dynamo_store.py +237 -0
  5. aavaaz/api/metrics.py +41 -0
  6. aavaaz/api/saas.py +405 -0
  7. aavaaz/api/search.py +47 -0
  8. aavaaz/api/storage.py +108 -0
  9. aavaaz/api/webhooks.py +70 -0
  10. aavaaz/cli.py +174 -0
  11. aavaaz/features/__init__.py +6 -0
  12. aavaaz/features/acl.py +312 -0
  13. aavaaz/features/audio_intelligence.py +681 -0
  14. aavaaz/features/batch_inference.py +418 -0
  15. aavaaz/features/diarization.py +211 -0
  16. aavaaz/features/ensemble.py +239 -0
  17. aavaaz/features/formatting.py +349 -0
  18. aavaaz/features/metrics.py +121 -0
  19. aavaaz/features/model_cache.py +113 -0
  20. aavaaz/features/multichannel.py +84 -0
  21. aavaaz/features/noise_reduction.py +118 -0
  22. aavaaz/features/pii_redaction.py +79 -0
  23. aavaaz/features/plugins.py +138 -0
  24. aavaaz/features/profanity_filter.py +103 -0
  25. aavaaz/features/search.py +267 -0
  26. aavaaz/features/storage.py +193 -0
  27. aavaaz/features/translation_relay.py +208 -0
  28. aavaaz/features/utterance.py +331 -0
  29. aavaaz/features/webhook.py +88 -0
  30. aavaaz/plugins/__init__.py +5 -0
  31. aavaaz/plugins/builtins.py +90 -0
  32. aavaaz/saas_server.py +75 -0
  33. aavaaz/sdks/python/__init__.py +237 -0
  34. aavaaz/server.py +99 -0
  35. aavaaz/serverless/__init__.py +1 -0
  36. aavaaz/serverless/lambda_handler.py +827 -0
  37. aavaaz/serverless/saas_lambda.py +342 -0
  38. aavaaz/transcribe.py +73 -0
  39. aavaaz/web/__init__.py +1 -0
  40. aavaaz-0.9.0.dist-info/METADATA +384 -0
  41. aavaaz-0.9.0.dist-info/RECORD +45 -0
  42. aavaaz-0.9.0.dist-info/WHEEL +5 -0
  43. aavaaz-0.9.0.dist-info/entry_points.txt +2 -0
  44. aavaaz-0.9.0.dist-info/licenses/LICENSE +373 -0
  45. aavaaz-0.9.0.dist-info/top_level.txt +1 -0
aavaaz/__init__.py ADDED
@@ -0,0 +1,3 @@
1
+ """Aavaaz — production-grade speech-to-text platform."""
2
+
3
+ __version__ = "0.9.0"
aavaaz/api/__init__.py ADDED
@@ -0,0 +1 @@
1
+ """Aavaaz extended API modules."""
aavaaz/api/auth.py ADDED
@@ -0,0 +1,72 @@
1
+ """
2
+ JWT-based authentication and API key access control for Aavaaz REST API.
3
+ """
4
+
5
+ import logging
6
+ import os
7
+ import time
8
+
9
+ import jwt
10
+ from fastapi import HTTPException, Request, Security
11
+ from fastapi.security import HTTPAuthorizationCredentials, HTTPBearer
12
+
13
+ logger = logging.getLogger(__name__)
14
+
15
+ _security = HTTPBearer(auto_error=False)
16
+
17
+ # Default secret — MUST be overridden via AAVAAZ_JWT_SECRET env var
18
+ _JWT_SECRET = os.environ.get("AAVAAZ_JWT_SECRET", "")
19
+ _JWT_ALGORITHM = "HS256"
20
+ _API_KEYS: set[str] = set()
21
+
22
+
23
+ def configure_auth(jwt_secret: str, api_keys: list[str] | None = None):
24
+ """Configure authentication settings."""
25
+ global _JWT_SECRET, _API_KEYS
26
+ _JWT_SECRET = jwt_secret
27
+ if api_keys:
28
+ _API_KEYS = set(api_keys)
29
+
30
+
31
+ def create_token(subject: str, expires_in: int = 3600, **claims) -> str:
32
+ """Create a signed JWT token."""
33
+ if not _JWT_SECRET:
34
+ raise ValueError("JWT secret not configured — set AAVAAZ_JWT_SECRET")
35
+ payload = {
36
+ "sub": subject,
37
+ "iat": int(time.time()),
38
+ "exp": int(time.time()) + expires_in,
39
+ **claims,
40
+ }
41
+ return jwt.encode(payload, _JWT_SECRET, algorithm=_JWT_ALGORITHM)
42
+
43
+
44
+ def verify_token(token: str) -> dict:
45
+ """Verify and decode a JWT token."""
46
+ if not _JWT_SECRET:
47
+ raise ValueError("JWT secret not configured")
48
+ return jwt.decode(token, _JWT_SECRET, algorithms=[_JWT_ALGORITHM])
49
+
50
+
51
+ async def require_auth(
52
+ request: Request,
53
+ credentials: HTTPAuthorizationCredentials | None = Security(_security),
54
+ ) -> dict:
55
+ """FastAPI dependency that requires valid authentication.
56
+
57
+ Supports both JWT bearer tokens and API keys (via X-API-Key header).
58
+ """
59
+ # Check API key header first
60
+ api_key = request.headers.get("X-API-Key")
61
+ if api_key and api_key in _API_KEYS:
62
+ return {"sub": "api_key", "key": api_key}
63
+
64
+ if credentials is None:
65
+ raise HTTPException(status_code=401, detail="Authentication required")
66
+
67
+ try:
68
+ return verify_token(credentials.credentials)
69
+ except jwt.ExpiredSignatureError:
70
+ raise HTTPException(status_code=401, detail="Token expired")
71
+ except jwt.InvalidTokenError:
72
+ raise HTTPException(status_code=401, detail="Invalid token")
@@ -0,0 +1,237 @@
1
+ """
2
+ DynamoDB-backed data store for Aavaaz SaaS.
3
+
4
+ Drop-in replacement for the in-memory store in api/saas.py.
5
+ Uses AWS DynamoDB for persistence — fits within free tier for early usage.
6
+
7
+ Tables:
8
+ - aavaaz-api-keys-{env}: API key storage (GSI on key_hash for auth lookups)
9
+ - aavaaz-usage-{env}: Daily usage records per user
10
+ - aavaaz-subscriptions-{env}: User subscription state
11
+ - aavaaz-transcripts-{env}: Transcript job history
12
+ """
13
+
14
+ import contextlib
15
+ import hashlib
16
+ import logging
17
+ import os
18
+ import secrets
19
+ import uuid
20
+ from datetime import UTC, datetime
21
+
22
+ import boto3
23
+ from boto3.dynamodb.conditions import Key
24
+
25
+ logger = logging.getLogger(__name__)
26
+
27
+ ENV = os.environ.get("AAVAAZ_ENVIRONMENT", "prod")
28
+ REGION = os.environ.get("AWS_REGION", "us-east-1")
29
+
30
+ _dynamodb = boto3.resource("dynamodb", region_name=REGION)
31
+
32
+ _table_api_keys = _dynamodb.Table(f"aavaaz-api-keys-{ENV}")
33
+ _table_usage = _dynamodb.Table(f"aavaaz-usage-{ENV}")
34
+ _table_subscriptions = _dynamodb.Table(f"aavaaz-subscriptions-{ENV}")
35
+ _table_transcripts = _dynamodb.Table(f"aavaaz-transcripts-{ENV}")
36
+
37
+
38
+ # ─── API Keys ────────────────────────────────────────────────────────────────
39
+
40
+
41
+ def create_api_key(user_id: str, name: str) -> tuple[dict, str]:
42
+ """Create a new API key. Returns (key_metadata, raw_secret)."""
43
+ raw_key = f"aavaaz_{secrets.token_urlsafe(32)}"
44
+ key_hash = hashlib.sha256(raw_key.encode()).hexdigest()
45
+ key_id = str(uuid.uuid4())
46
+ now = datetime.now(UTC).isoformat()
47
+
48
+ item = {
49
+ "user_id": user_id,
50
+ "key_id": key_id,
51
+ "name": name,
52
+ "key_hash": key_hash,
53
+ "prefix": raw_key[:12],
54
+ "created_at": now,
55
+ "last_used": None,
56
+ "expires_at": None,
57
+ }
58
+
59
+ _table_api_keys.put_item(Item=item)
60
+
61
+ metadata = {
62
+ "id": key_id,
63
+ "name": name,
64
+ "prefix": raw_key[:12],
65
+ "created_at": now,
66
+ "last_used": None,
67
+ "expires_at": None,
68
+ }
69
+ return metadata, raw_key
70
+
71
+
72
+ def list_api_keys(user_id: str) -> list[dict]:
73
+ """List all API keys for a user."""
74
+ response = _table_api_keys.query(KeyConditionExpression=Key("user_id").eq(user_id))
75
+ return [
76
+ {
77
+ "id": item["key_id"],
78
+ "name": item["name"],
79
+ "prefix": item["prefix"],
80
+ "created_at": item["created_at"],
81
+ "last_used": item.get("last_used"),
82
+ "expires_at": item.get("expires_at"),
83
+ }
84
+ for item in response.get("Items", [])
85
+ ]
86
+
87
+
88
+ def revoke_api_key(user_id: str, key_id: str) -> bool:
89
+ """Revoke (delete) an API key. Returns True if found and deleted."""
90
+ try:
91
+ _table_api_keys.delete_item(
92
+ Key={"user_id": user_id, "key_id": key_id},
93
+ ConditionExpression="attribute_exists(user_id)",
94
+ )
95
+ return True
96
+ except _dynamodb.meta.client.exceptions.ConditionalCheckFailedException:
97
+ return False
98
+
99
+
100
+ def validate_api_key(raw_key: str) -> str | None:
101
+ """Validate an API key and return user_id, or None if invalid."""
102
+ key_hash = hashlib.sha256(raw_key.encode()).hexdigest()
103
+
104
+ response = _table_api_keys.query(
105
+ IndexName="key-hash-index",
106
+ KeyConditionExpression=Key("key_hash").eq(key_hash),
107
+ )
108
+
109
+ items = response.get("Items", [])
110
+ if not items:
111
+ return None
112
+
113
+ item = items[0]
114
+
115
+ # Update last_used timestamp (fire and forget)
116
+ with contextlib.suppress(Exception):
117
+ _table_api_keys.update_item(
118
+ Key={"user_id": item["user_id"], "key_id": item["key_id"]},
119
+ UpdateExpression="SET last_used = :now",
120
+ ExpressionAttributeValues={":now": datetime.now(UTC).isoformat()},
121
+ )
122
+
123
+ return item["user_id"]
124
+
125
+
126
+ # ─── Usage Tracking ──────────────────────────────────────────────────────────
127
+
128
+
129
+ def record_usage(user_id: str, audio_minutes: float):
130
+ """Record usage for the current day. Atomic increment."""
131
+ today = datetime.now(UTC).strftime("%Y-%m-%d")
132
+
133
+ _table_usage.update_item(
134
+ Key={"user_id": user_id, "date": today},
135
+ UpdateExpression="ADD audio_minutes :mins, requests :one",
136
+ ExpressionAttributeValues={
137
+ ":mins": round(audio_minutes, 4),
138
+ ":one": 1,
139
+ },
140
+ )
141
+
142
+
143
+ def get_usage(user_id: str, days: int = 30) -> list[dict]:
144
+ """Get daily usage records for a user (last N days)."""
145
+ from datetime import timedelta
146
+
147
+ start_date = (datetime.now(UTC) - timedelta(days=days)).strftime("%Y-%m-%d")
148
+
149
+ response = _table_usage.query(
150
+ KeyConditionExpression=Key("user_id").eq(user_id) & Key("date").gte(start_date),
151
+ ScanIndexForward=True,
152
+ )
153
+
154
+ return [
155
+ {
156
+ "date": item["date"],
157
+ "audio_minutes": float(item.get("audio_minutes", 0)),
158
+ "requests": int(item.get("requests", 0)),
159
+ }
160
+ for item in response.get("Items", [])
161
+ ]
162
+
163
+
164
+ # ─── Subscriptions ───────────────────────────────────────────────────────────
165
+
166
+
167
+ def get_subscription(user_id: str) -> dict:
168
+ """Get subscription info for a user."""
169
+ response = _table_subscriptions.get_item(Key={"user_id": user_id})
170
+ item = response.get("Item")
171
+
172
+ if not item:
173
+ return {
174
+ "user_id": user_id,
175
+ "plan": "free",
176
+ "status": "active",
177
+ "stripe_customer_id": "",
178
+ "stripe_subscription_id": "",
179
+ "current_period_end": "",
180
+ "cancel_at_period_end": False,
181
+ }
182
+
183
+ return item
184
+
185
+
186
+ def update_subscription(user_id: str, updates: dict):
187
+ """Update subscription fields."""
188
+ expressions = []
189
+ values = {}
190
+ for key, value in updates.items():
191
+ expressions.append(f"{key} = :{key}")
192
+ values[f":{key}"] = value
193
+
194
+ _table_subscriptions.update_item(
195
+ Key={"user_id": user_id},
196
+ UpdateExpression="SET " + ", ".join(expressions),
197
+ ExpressionAttributeValues=values,
198
+ )
199
+
200
+
201
+ def find_user_by_stripe_customer(stripe_customer_id: str) -> str | None:
202
+ """Find user_id by Stripe customer ID."""
203
+ response = _table_subscriptions.query(
204
+ IndexName="stripe-customer-index",
205
+ KeyConditionExpression=Key("stripe_customer_id").eq(stripe_customer_id),
206
+ )
207
+ items = response.get("Items", [])
208
+ return items[0]["user_id"] if items else None
209
+
210
+
211
+ # ─── Transcripts ─────────────────────────────────────────────────────────────
212
+
213
+
214
+ def save_transcript(user_id: str, job: dict):
215
+ """Save a transcript job record."""
216
+ job["user_id"] = user_id
217
+ if "created_at" not in job:
218
+ job["created_at"] = datetime.now(UTC).isoformat()
219
+ _table_transcripts.put_item(Item=job)
220
+
221
+
222
+ def list_transcripts(user_id: str, limit: int = 50) -> list[dict]:
223
+ """List recent transcript jobs for a user."""
224
+ response = _table_transcripts.query(
225
+ KeyConditionExpression=Key("user_id").eq(user_id),
226
+ ScanIndexForward=False,
227
+ Limit=limit,
228
+ )
229
+ return response.get("Items", [])
230
+
231
+
232
+ def get_transcript(user_id: str, created_at: str) -> dict | None:
233
+ """Get a specific transcript by user_id and created_at."""
234
+ response = _table_transcripts.get_item(
235
+ Key={"user_id": user_id, "created_at": created_at}
236
+ )
237
+ return response.get("Item")
aavaaz/api/metrics.py ADDED
@@ -0,0 +1,41 @@
1
+ """
2
+ Prometheus metrics endpoint for Aavaaz.
3
+
4
+ Wraps WhisperLive's metrics module and adds Aavaaz-specific metrics.
5
+ """
6
+
7
+ from fastapi import APIRouter
8
+ from fastapi.responses import Response
9
+ from prometheus_client import Counter, Gauge, Histogram, generate_latest
10
+
11
+ router = APIRouter()
12
+
13
+ # Aavaaz-level metrics
14
+ TRANSCRIPTION_REQUESTS = Counter(
15
+ "aavaaz_transcription_requests_total",
16
+ "Total transcription requests",
17
+ ["method", "status"],
18
+ )
19
+ TRANSCRIPTION_DURATION = Histogram(
20
+ "aavaaz_transcription_duration_seconds",
21
+ "Time spent transcribing audio",
22
+ buckets=[0.1, 0.5, 1, 2, 5, 10, 30, 60, 120],
23
+ )
24
+ ACTIVE_CONNECTIONS = Gauge(
25
+ "aavaaz_active_websocket_connections",
26
+ "Currently active WebSocket connections",
27
+ )
28
+ PLUGIN_ERRORS = Counter(
29
+ "aavaaz_plugin_errors_total",
30
+ "Plugin processing errors",
31
+ ["plugin_name"],
32
+ )
33
+
34
+
35
+ @router.get("/metrics")
36
+ async def metrics():
37
+ """Prometheus metrics endpoint."""
38
+ return Response(
39
+ content=generate_latest(),
40
+ media_type="text/plain; version=0.0.4; charset=utf-8",
41
+ )