arize-phoenix 10.0.4__py3-none-any.whl → 12.28.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (276) hide show
  1. {arize_phoenix-10.0.4.dist-info → arize_phoenix-12.28.1.dist-info}/METADATA +124 -72
  2. arize_phoenix-12.28.1.dist-info/RECORD +499 -0
  3. {arize_phoenix-10.0.4.dist-info → arize_phoenix-12.28.1.dist-info}/WHEEL +1 -1
  4. {arize_phoenix-10.0.4.dist-info → arize_phoenix-12.28.1.dist-info}/licenses/IP_NOTICE +1 -1
  5. phoenix/__generated__/__init__.py +0 -0
  6. phoenix/__generated__/classification_evaluator_configs/__init__.py +20 -0
  7. phoenix/__generated__/classification_evaluator_configs/_document_relevance_classification_evaluator_config.py +17 -0
  8. phoenix/__generated__/classification_evaluator_configs/_hallucination_classification_evaluator_config.py +17 -0
  9. phoenix/__generated__/classification_evaluator_configs/_models.py +18 -0
  10. phoenix/__generated__/classification_evaluator_configs/_tool_selection_classification_evaluator_config.py +17 -0
  11. phoenix/__init__.py +5 -4
  12. phoenix/auth.py +39 -2
  13. phoenix/config.py +1763 -91
  14. phoenix/datetime_utils.py +120 -2
  15. phoenix/db/README.md +595 -25
  16. phoenix/db/bulk_inserter.py +145 -103
  17. phoenix/db/engines.py +140 -33
  18. phoenix/db/enums.py +3 -12
  19. phoenix/db/facilitator.py +302 -35
  20. phoenix/db/helpers.py +1000 -65
  21. phoenix/db/iam_auth.py +64 -0
  22. phoenix/db/insertion/dataset.py +135 -2
  23. phoenix/db/insertion/document_annotation.py +9 -6
  24. phoenix/db/insertion/evaluation.py +2 -3
  25. phoenix/db/insertion/helpers.py +17 -2
  26. phoenix/db/insertion/session_annotation.py +176 -0
  27. phoenix/db/insertion/span.py +15 -11
  28. phoenix/db/insertion/span_annotation.py +3 -4
  29. phoenix/db/insertion/trace_annotation.py +3 -4
  30. phoenix/db/insertion/types.py +50 -20
  31. phoenix/db/migrations/versions/01a8342c9cdf_add_user_id_on_datasets.py +40 -0
  32. phoenix/db/migrations/versions/0df286449799_add_session_annotations_table.py +105 -0
  33. phoenix/db/migrations/versions/272b66ff50f8_drop_single_indices.py +119 -0
  34. phoenix/db/migrations/versions/58228d933c91_dataset_labels.py +67 -0
  35. phoenix/db/migrations/versions/699f655af132_experiment_tags.py +57 -0
  36. phoenix/db/migrations/versions/735d3d93c33e_add_composite_indices.py +41 -0
  37. phoenix/db/migrations/versions/a20694b15f82_cost.py +196 -0
  38. phoenix/db/migrations/versions/ab513d89518b_add_user_id_on_dataset_versions.py +40 -0
  39. phoenix/db/migrations/versions/d0690a79ea51_users_on_experiments.py +40 -0
  40. phoenix/db/migrations/versions/deb2c81c0bb2_dataset_splits.py +139 -0
  41. phoenix/db/migrations/versions/e76cbd66ffc3_add_experiments_dataset_examples.py +87 -0
  42. phoenix/db/models.py +669 -56
  43. phoenix/db/pg_config.py +10 -0
  44. phoenix/db/types/model_provider.py +4 -0
  45. phoenix/db/types/token_price_customization.py +29 -0
  46. phoenix/db/types/trace_retention.py +23 -15
  47. phoenix/experiments/evaluators/utils.py +3 -3
  48. phoenix/experiments/functions.py +160 -52
  49. phoenix/experiments/tracing.py +2 -2
  50. phoenix/experiments/types.py +1 -1
  51. phoenix/inferences/inferences.py +1 -2
  52. phoenix/server/api/auth.py +38 -7
  53. phoenix/server/api/auth_messages.py +46 -0
  54. phoenix/server/api/context.py +100 -4
  55. phoenix/server/api/dataloaders/__init__.py +79 -5
  56. phoenix/server/api/dataloaders/annotation_configs_by_project.py +31 -0
  57. phoenix/server/api/dataloaders/annotation_summaries.py +60 -8
  58. phoenix/server/api/dataloaders/average_experiment_repeated_run_group_latency.py +50 -0
  59. phoenix/server/api/dataloaders/average_experiment_run_latency.py +17 -24
  60. phoenix/server/api/dataloaders/cache/two_tier_cache.py +1 -2
  61. phoenix/server/api/dataloaders/dataset_dataset_splits.py +52 -0
  62. phoenix/server/api/dataloaders/dataset_example_revisions.py +0 -1
  63. phoenix/server/api/dataloaders/dataset_example_splits.py +40 -0
  64. phoenix/server/api/dataloaders/dataset_examples_and_versions_by_experiment_run.py +47 -0
  65. phoenix/server/api/dataloaders/dataset_labels.py +36 -0
  66. phoenix/server/api/dataloaders/document_evaluation_summaries.py +2 -2
  67. phoenix/server/api/dataloaders/document_evaluations.py +6 -9
  68. phoenix/server/api/dataloaders/experiment_annotation_summaries.py +88 -34
  69. phoenix/server/api/dataloaders/experiment_dataset_splits.py +43 -0
  70. phoenix/server/api/dataloaders/experiment_error_rates.py +21 -28
  71. phoenix/server/api/dataloaders/experiment_repeated_run_group_annotation_summaries.py +77 -0
  72. phoenix/server/api/dataloaders/experiment_repeated_run_groups.py +57 -0
  73. phoenix/server/api/dataloaders/experiment_runs_by_experiment_and_example.py +44 -0
  74. phoenix/server/api/dataloaders/last_used_times_by_generative_model_id.py +35 -0
  75. phoenix/server/api/dataloaders/latency_ms_quantile.py +40 -8
  76. phoenix/server/api/dataloaders/record_counts.py +37 -10
  77. phoenix/server/api/dataloaders/session_annotations_by_session.py +29 -0
  78. phoenix/server/api/dataloaders/span_cost_by_span.py +24 -0
  79. phoenix/server/api/dataloaders/span_cost_detail_summary_entries_by_generative_model.py +56 -0
  80. phoenix/server/api/dataloaders/span_cost_detail_summary_entries_by_project_session.py +57 -0
  81. phoenix/server/api/dataloaders/span_cost_detail_summary_entries_by_span.py +43 -0
  82. phoenix/server/api/dataloaders/span_cost_detail_summary_entries_by_trace.py +56 -0
  83. phoenix/server/api/dataloaders/span_cost_details_by_span_cost.py +27 -0
  84. phoenix/server/api/dataloaders/span_cost_summary_by_experiment.py +57 -0
  85. phoenix/server/api/dataloaders/span_cost_summary_by_experiment_repeated_run_group.py +64 -0
  86. phoenix/server/api/dataloaders/span_cost_summary_by_experiment_run.py +58 -0
  87. phoenix/server/api/dataloaders/span_cost_summary_by_generative_model.py +55 -0
  88. phoenix/server/api/dataloaders/span_cost_summary_by_project.py +152 -0
  89. phoenix/server/api/dataloaders/span_cost_summary_by_project_session.py +56 -0
  90. phoenix/server/api/dataloaders/span_cost_summary_by_trace.py +55 -0
  91. phoenix/server/api/dataloaders/span_costs.py +29 -0
  92. phoenix/server/api/dataloaders/table_fields.py +2 -2
  93. phoenix/server/api/dataloaders/token_prices_by_model.py +30 -0
  94. phoenix/server/api/dataloaders/trace_annotations_by_trace.py +27 -0
  95. phoenix/server/api/dataloaders/types.py +29 -0
  96. phoenix/server/api/exceptions.py +11 -1
  97. phoenix/server/api/helpers/dataset_helpers.py +5 -1
  98. phoenix/server/api/helpers/playground_clients.py +1243 -292
  99. phoenix/server/api/helpers/playground_registry.py +2 -2
  100. phoenix/server/api/helpers/playground_spans.py +8 -4
  101. phoenix/server/api/helpers/playground_users.py +26 -0
  102. phoenix/server/api/helpers/prompts/conversions/aws.py +83 -0
  103. phoenix/server/api/helpers/prompts/conversions/google.py +103 -0
  104. phoenix/server/api/helpers/prompts/models.py +205 -22
  105. phoenix/server/api/input_types/{SpanAnnotationFilter.py → AnnotationFilter.py} +22 -14
  106. phoenix/server/api/input_types/ChatCompletionInput.py +6 -2
  107. phoenix/server/api/input_types/CreateProjectInput.py +27 -0
  108. phoenix/server/api/input_types/CreateProjectSessionAnnotationInput.py +37 -0
  109. phoenix/server/api/input_types/DatasetFilter.py +17 -0
  110. phoenix/server/api/input_types/ExperimentRunSort.py +237 -0
  111. phoenix/server/api/input_types/GenerativeCredentialInput.py +9 -0
  112. phoenix/server/api/input_types/GenerativeModelInput.py +5 -0
  113. phoenix/server/api/input_types/ProjectSessionSort.py +161 -1
  114. phoenix/server/api/input_types/PromptFilter.py +14 -0
  115. phoenix/server/api/input_types/PromptVersionInput.py +52 -1
  116. phoenix/server/api/input_types/SpanSort.py +44 -7
  117. phoenix/server/api/input_types/TimeBinConfig.py +23 -0
  118. phoenix/server/api/input_types/UpdateAnnotationInput.py +34 -0
  119. phoenix/server/api/input_types/UserRoleInput.py +1 -0
  120. phoenix/server/api/mutations/__init__.py +10 -0
  121. phoenix/server/api/mutations/annotation_config_mutations.py +8 -8
  122. phoenix/server/api/mutations/api_key_mutations.py +19 -23
  123. phoenix/server/api/mutations/chat_mutations.py +154 -47
  124. phoenix/server/api/mutations/dataset_label_mutations.py +243 -0
  125. phoenix/server/api/mutations/dataset_mutations.py +21 -16
  126. phoenix/server/api/mutations/dataset_split_mutations.py +351 -0
  127. phoenix/server/api/mutations/experiment_mutations.py +2 -2
  128. phoenix/server/api/mutations/export_events_mutations.py +3 -3
  129. phoenix/server/api/mutations/model_mutations.py +210 -0
  130. phoenix/server/api/mutations/project_mutations.py +49 -10
  131. phoenix/server/api/mutations/project_session_annotations_mutations.py +158 -0
  132. phoenix/server/api/mutations/project_trace_retention_policy_mutations.py +8 -4
  133. phoenix/server/api/mutations/prompt_label_mutations.py +74 -65
  134. phoenix/server/api/mutations/prompt_mutations.py +65 -129
  135. phoenix/server/api/mutations/prompt_version_tag_mutations.py +11 -8
  136. phoenix/server/api/mutations/span_annotations_mutations.py +15 -10
  137. phoenix/server/api/mutations/trace_annotations_mutations.py +14 -10
  138. phoenix/server/api/mutations/trace_mutations.py +47 -3
  139. phoenix/server/api/mutations/user_mutations.py +66 -41
  140. phoenix/server/api/queries.py +768 -293
  141. phoenix/server/api/routers/__init__.py +2 -2
  142. phoenix/server/api/routers/auth.py +154 -88
  143. phoenix/server/api/routers/ldap.py +229 -0
  144. phoenix/server/api/routers/oauth2.py +369 -106
  145. phoenix/server/api/routers/v1/__init__.py +24 -4
  146. phoenix/server/api/routers/v1/annotation_configs.py +23 -31
  147. phoenix/server/api/routers/v1/annotations.py +481 -17
  148. phoenix/server/api/routers/v1/datasets.py +395 -81
  149. phoenix/server/api/routers/v1/documents.py +142 -0
  150. phoenix/server/api/routers/v1/evaluations.py +24 -31
  151. phoenix/server/api/routers/v1/experiment_evaluations.py +19 -8
  152. phoenix/server/api/routers/v1/experiment_runs.py +337 -59
  153. phoenix/server/api/routers/v1/experiments.py +479 -48
  154. phoenix/server/api/routers/v1/models.py +7 -0
  155. phoenix/server/api/routers/v1/projects.py +18 -49
  156. phoenix/server/api/routers/v1/prompts.py +54 -40
  157. phoenix/server/api/routers/v1/sessions.py +108 -0
  158. phoenix/server/api/routers/v1/spans.py +1091 -81
  159. phoenix/server/api/routers/v1/traces.py +132 -78
  160. phoenix/server/api/routers/v1/users.py +389 -0
  161. phoenix/server/api/routers/v1/utils.py +3 -7
  162. phoenix/server/api/subscriptions.py +305 -88
  163. phoenix/server/api/types/Annotation.py +90 -23
  164. phoenix/server/api/types/ApiKey.py +13 -17
  165. phoenix/server/api/types/AuthMethod.py +1 -0
  166. phoenix/server/api/types/ChatCompletionSubscriptionPayload.py +1 -0
  167. phoenix/server/api/types/CostBreakdown.py +12 -0
  168. phoenix/server/api/types/Dataset.py +226 -72
  169. phoenix/server/api/types/DatasetExample.py +88 -18
  170. phoenix/server/api/types/DatasetExperimentAnnotationSummary.py +10 -0
  171. phoenix/server/api/types/DatasetLabel.py +57 -0
  172. phoenix/server/api/types/DatasetSplit.py +98 -0
  173. phoenix/server/api/types/DatasetVersion.py +49 -4
  174. phoenix/server/api/types/DocumentAnnotation.py +212 -0
  175. phoenix/server/api/types/Experiment.py +264 -59
  176. phoenix/server/api/types/ExperimentComparison.py +5 -10
  177. phoenix/server/api/types/ExperimentRepeatedRunGroup.py +155 -0
  178. phoenix/server/api/types/ExperimentRepeatedRunGroupAnnotationSummary.py +9 -0
  179. phoenix/server/api/types/ExperimentRun.py +169 -65
  180. phoenix/server/api/types/ExperimentRunAnnotation.py +158 -39
  181. phoenix/server/api/types/GenerativeModel.py +245 -3
  182. phoenix/server/api/types/GenerativeProvider.py +70 -11
  183. phoenix/server/api/types/{Model.py → InferenceModel.py} +1 -1
  184. phoenix/server/api/types/ModelInterface.py +16 -0
  185. phoenix/server/api/types/PlaygroundModel.py +20 -0
  186. phoenix/server/api/types/Project.py +1278 -216
  187. phoenix/server/api/types/ProjectSession.py +188 -28
  188. phoenix/server/api/types/ProjectSessionAnnotation.py +187 -0
  189. phoenix/server/api/types/ProjectTraceRetentionPolicy.py +1 -1
  190. phoenix/server/api/types/Prompt.py +119 -39
  191. phoenix/server/api/types/PromptLabel.py +42 -25
  192. phoenix/server/api/types/PromptVersion.py +11 -8
  193. phoenix/server/api/types/PromptVersionTag.py +65 -25
  194. phoenix/server/api/types/ServerStatus.py +6 -0
  195. phoenix/server/api/types/Span.py +167 -123
  196. phoenix/server/api/types/SpanAnnotation.py +189 -42
  197. phoenix/server/api/types/SpanCostDetailSummaryEntry.py +10 -0
  198. phoenix/server/api/types/SpanCostSummary.py +10 -0
  199. phoenix/server/api/types/SystemApiKey.py +65 -1
  200. phoenix/server/api/types/TokenPrice.py +16 -0
  201. phoenix/server/api/types/TokenUsage.py +3 -3
  202. phoenix/server/api/types/Trace.py +223 -51
  203. phoenix/server/api/types/TraceAnnotation.py +149 -50
  204. phoenix/server/api/types/User.py +137 -32
  205. phoenix/server/api/types/UserApiKey.py +73 -26
  206. phoenix/server/api/types/node.py +10 -0
  207. phoenix/server/api/types/pagination.py +11 -2
  208. phoenix/server/app.py +290 -45
  209. phoenix/server/authorization.py +38 -3
  210. phoenix/server/bearer_auth.py +34 -24
  211. phoenix/server/cost_tracking/cost_details_calculator.py +196 -0
  212. phoenix/server/cost_tracking/cost_model_lookup.py +179 -0
  213. phoenix/server/cost_tracking/helpers.py +68 -0
  214. phoenix/server/cost_tracking/model_cost_manifest.json +3657 -830
  215. phoenix/server/cost_tracking/regex_specificity.py +397 -0
  216. phoenix/server/cost_tracking/token_cost_calculator.py +57 -0
  217. phoenix/server/daemons/__init__.py +0 -0
  218. phoenix/server/daemons/db_disk_usage_monitor.py +214 -0
  219. phoenix/server/daemons/generative_model_store.py +103 -0
  220. phoenix/server/daemons/span_cost_calculator.py +99 -0
  221. phoenix/server/dml_event.py +17 -0
  222. phoenix/server/dml_event_handler.py +5 -0
  223. phoenix/server/email/sender.py +56 -3
  224. phoenix/server/email/templates/db_disk_usage_notification.html +19 -0
  225. phoenix/server/email/types.py +11 -0
  226. phoenix/server/experiments/__init__.py +0 -0
  227. phoenix/server/experiments/utils.py +14 -0
  228. phoenix/server/grpc_server.py +11 -11
  229. phoenix/server/jwt_store.py +17 -15
  230. phoenix/server/ldap.py +1449 -0
  231. phoenix/server/main.py +26 -10
  232. phoenix/server/oauth2.py +330 -12
  233. phoenix/server/prometheus.py +66 -6
  234. phoenix/server/rate_limiters.py +4 -9
  235. phoenix/server/retention.py +33 -20
  236. phoenix/server/session_filters.py +49 -0
  237. phoenix/server/static/.vite/manifest.json +55 -51
  238. phoenix/server/static/assets/components-BreFUQQa.js +6702 -0
  239. phoenix/server/static/assets/{index-E0M82BdE.js → index-CTQoemZv.js} +140 -56
  240. phoenix/server/static/assets/pages-DBE5iYM3.js +9524 -0
  241. phoenix/server/static/assets/vendor-BGzfc4EU.css +1 -0
  242. phoenix/server/static/assets/vendor-DCE4v-Ot.js +920 -0
  243. phoenix/server/static/assets/vendor-codemirror-D5f205eT.js +25 -0
  244. phoenix/server/static/assets/vendor-recharts-V9cwpXsm.js +37 -0
  245. phoenix/server/static/assets/vendor-shiki-Do--csgv.js +5 -0
  246. phoenix/server/static/assets/vendor-three-CmB8bl_y.js +3840 -0
  247. phoenix/server/templates/index.html +40 -6
  248. phoenix/server/thread_server.py +1 -2
  249. phoenix/server/types.py +14 -4
  250. phoenix/server/utils.py +74 -0
  251. phoenix/session/client.py +56 -3
  252. phoenix/session/data_extractor.py +5 -0
  253. phoenix/session/evaluation.py +14 -5
  254. phoenix/session/session.py +45 -9
  255. phoenix/settings.py +5 -0
  256. phoenix/trace/attributes.py +80 -13
  257. phoenix/trace/dsl/helpers.py +90 -1
  258. phoenix/trace/dsl/query.py +8 -6
  259. phoenix/trace/projects.py +5 -0
  260. phoenix/utilities/template_formatters.py +1 -1
  261. phoenix/version.py +1 -1
  262. arize_phoenix-10.0.4.dist-info/RECORD +0 -405
  263. phoenix/server/api/types/Evaluation.py +0 -39
  264. phoenix/server/cost_tracking/cost_lookup.py +0 -255
  265. phoenix/server/static/assets/components-DULKeDfL.js +0 -4365
  266. phoenix/server/static/assets/pages-Cl0A-0U2.js +0 -7430
  267. phoenix/server/static/assets/vendor-WIZid84E.css +0 -1
  268. phoenix/server/static/assets/vendor-arizeai-Dy-0mSNw.js +0 -649
  269. phoenix/server/static/assets/vendor-codemirror-DBtifKNr.js +0 -33
  270. phoenix/server/static/assets/vendor-oB4u9zuV.js +0 -905
  271. phoenix/server/static/assets/vendor-recharts-D-T4KPz2.js +0 -59
  272. phoenix/server/static/assets/vendor-shiki-BMn4O_9F.js +0 -5
  273. phoenix/server/static/assets/vendor-three-C5WAXd5r.js +0 -2998
  274. phoenix/utilities/deprecation.py +0 -31
  275. {arize_phoenix-10.0.4.dist-info → arize_phoenix-12.28.1.dist-info}/entry_points.txt +0 -0
  276. {arize_phoenix-10.0.4.dist-info → arize_phoenix-12.28.1.dist-info}/licenses/LICENSE +0 -0
phoenix/server/ldap.py ADDED
@@ -0,0 +1,1449 @@
1
+ """LDAP authentication for Phoenix.
2
+
3
+ Provides LDAP/Active Directory authentication following RFC 4510-4519 standards.
4
+
5
+ TLS Connection Modes:
6
+ LDAP supports three connection security modes:
7
+
8
+ 1. LDAPS (LDAP over TLS, port 636):
9
+ - TLS established at TCP connection layer (like HTTPS)
10
+ - Server created with use_ssl=True
11
+ - Bind credentials encrypted from the start
12
+ - No protocol-level upgrade needed
13
+
14
+ 2. STARTTLS (Upgrade to TLS, port 389):
15
+ - Connection starts as plaintext on standard LDAP port
16
+ - Client sends Extended Request (OID 1.3.6.1.4.1.1466.20037) to upgrade
17
+ - Server upgrades connection to TLS
18
+ - All subsequent data (including bind) encrypted
19
+ - CRITICAL: Must call start_tls() BEFORE sending credentials
20
+
21
+ 3. Plaintext (No encryption, testing only):
22
+ - All data transmitted unencrypted
23
+ - Not recommended for production
24
+
25
+ Advanced TLS Configuration:
26
+ Phoenix supports enterprise TLS requirements via optional configuration:
27
+
28
+ - Custom CA Certificates (tls_ca_cert_file):
29
+ For LDAP servers using private/internal certificate authorities
30
+ not present in the system's default trust store.
31
+
32
+ - Client Certificates (tls_client_cert_file, tls_client_key_file):
33
+ For mutual TLS (mTLS) authentication where the LDAP server
34
+ requires client certificate validation.
35
+
36
+ Security Considerations:
37
+ This module implements multiple layers of defense against LDAP-specific attacks:
38
+
39
+ - Anonymous Bind Prevention (RFC 4513 §5.1.2):
40
+ Empty passwords are rejected before any LDAP operation. Many LDAP servers
41
+ treat empty-password binds as "unauthenticated" (anonymous), which would
42
+ allow attackers to bypass authentication entirely.
43
+
44
+ - LDAP Injection Prevention (RFC 4515):
45
+ All user input is escaped before insertion into LDAP filters using
46
+ escape_filter_chars(). This prevents filter manipulation attacks like
47
+ username="*" or "admin)(uid=*".
48
+
49
+ - Referral Following Disabled:
50
+ ldap3 defaults to auto_referrals=True, which follows LDAP referrals to
51
+ ANY server and sends bind credentials automatically. An attacker who can
52
+ inject a referral response could steal service account credentials.
53
+ Phoenix disables this (auto_referrals=False) and relies on explicit
54
+ multi-server configuration for high availability instead.
55
+
56
+ - Timing Attack Mitigation:
57
+ When a user is not found, a dummy bind is performed to equalize response
58
+ times with the "wrong password" case, preventing username enumeration.
59
+
60
+ - Exception Sanitization:
61
+ LDAP exception messages may contain sensitive information (server IPs,
62
+ DNs, configuration details). Only the exception type is logged.
63
+
64
+ Implementation Notes:
65
+ The ldap3 library requires explicit handling of STARTTLS via:
66
+ - AUTO_BIND_TLS_BEFORE_BIND constant for automatic bind flows
67
+ - Manual start_tls() call for explicit bind flows
68
+
69
+ Using auto_bind=True (AUTO_BIND_NO_TLS) with STARTTLS configuration
70
+ will transmit credentials in PLAINTEXT despite TLS being "enabled".
71
+ This is a critical security vulnerability that this module guards against.
72
+
73
+ Thread Safety Note:
74
+ This implementation uses ldap3's default SYNC strategy (not SAFE_SYNC) because
75
+ Connection objects are created fresh for each authentication request and never
76
+ shared between threads. The SAFE_SYNC strategy would only be required if we
77
+ introduced connection pooling or reused Connection objects across requests.
78
+ Server objects ARE thread-safe (ldap3 uses internal locking for message IDs).
79
+
80
+ Note: ldap3's "ASYNC" strategy uses OS threads, NOT Python async/await coroutines.
81
+ The library has no native asyncio support—all strategies perform blocking socket I/O.
82
+ We therefore run ldap3 in a thread pool via anyio.to_thread.run_sync() to avoid
83
+ blocking the FastAPI event loop (see authenticate() docstring for details).
84
+
85
+ Known Limitations:
86
+ - No connection pooling: Each authentication creates fresh connections.
87
+ For very high-volume deployments (>100 auth/sec), consider adding ldap3
88
+ connection pooling or an external LDAP proxy (e.g., HAProxy).
89
+
90
+ - No pagination for group searches: POSIX mode group searches may be
91
+ truncated if the directory contains >1000 matching groups per search base.
92
+ Most deployments won't hit this limit.
93
+
94
+ - No nested group resolution: Active Directory nested groups (group-in-group)
95
+ require recursive memberOf queries or LDAP_MATCHING_RULE_IN_CHAIN (OID
96
+ 1.2.840.113556.1.4.1941). Currently only direct group memberships are
97
+ resolved. Configure flattened groups or use AD's tokenGroups attribute
98
+ if nested resolution is required.
99
+
100
+ See Also:
101
+ _create_servers(): Server-level TLS configuration (use_ssl, tls)
102
+ _establish_connection(): Connection-level TLS upgrade (AUTO_BIND modes)
103
+ _verify_user_password(): Manual TLS upgrade sequencing
104
+ """
105
+
106
+ from __future__ import annotations
107
+
108
+ import logging
109
+ import random
110
+ import ssl
111
+ from hashlib import md5
112
+ from secrets import token_hex
113
+ from typing import Any, Final, Literal, NamedTuple, cast, overload
114
+
115
+ import anyio
116
+ from anyio import CapacityLimiter
117
+ from ldap3 import (
118
+ AUTO_BIND_DEFAULT,
119
+ AUTO_BIND_NO_TLS,
120
+ AUTO_BIND_NONE,
121
+ AUTO_BIND_TLS_BEFORE_BIND,
122
+ NONE,
123
+ SUBTREE,
124
+ Connection,
125
+ Entry,
126
+ Server,
127
+ Tls,
128
+ )
129
+ from ldap3.core.exceptions import LDAPException, LDAPInvalidCredentialsResult, LDAPInvalidDnError
130
+ from ldap3.core.results import RESULT_SIZE_LIMIT_EXCEEDED
131
+ from ldap3.utils.conv import escape_filter_chars
132
+ from ldap3.utils.dn import parse_dn
133
+
134
+ from phoenix.config import AssignableUserRoleName, LDAPConfig
135
+
136
+ logger = logging.getLogger(__name__)
137
+
138
+ # Limit concurrent LDAP operations to prevent thread pool exhaustion.
139
+ # Each LDAP authentication spawns a thread (ldap3 is synchronous-only). This
140
+ # limit acts as a safety valve against credential stuffing attacks or runaway
141
+ # retry loops, not as a throughput target. 10 concurrent operations is more
142
+ # than sufficient for typical Phoenix deployments.
143
+ _LDAP_CONCURRENCY_LIMIT: Final[int] = 10
144
+ _ldap_limiter: CapacityLimiter | None = None
145
+
146
+
147
+ def _get_ldap_limiter() -> CapacityLimiter:
148
+ """Get or create the LDAP concurrency limiter (lazy initialization).
149
+
150
+ Lazy initialization is required because CapacityLimiter must be created
151
+ within an async context (it uses the current event loop). Creating it at
152
+ module load time would fail since there's no event loop yet.
153
+ """
154
+ global _ldap_limiter
155
+ if _ldap_limiter is None:
156
+ _ldap_limiter = CapacityLimiter(_LDAP_CONCURRENCY_LIMIT)
157
+ return _ldap_limiter
158
+
159
+
160
+ def canonicalize_dn(dn: str) -> str | None:
161
+ r"""Canonicalize a Distinguished Name per RFC 4514.
162
+
163
+ This function normalizes DNs to a canonical form for case-insensitive
164
+ comparison and storage. It handles:
165
+ - Case normalization (attribute types and values lowercased)
166
+ - Whitespace normalization (stripped around = and ,)
167
+ - Multi-valued RDN ordering (sorted alphabetically for deterministic output)
168
+ - Escaped character preservation (maintains \, \+ etc.)
169
+ - Hex encoding normalization (decoded to canonical form)
170
+
171
+ RFC 4514 states that DNs are case-insensitive for comparison, but leaves
172
+ the canonical form implementation-defined. This implementation ensures that
173
+ semantically equivalent DNs (e.g., "cn=John" vs "CN=john") map to the same
174
+ canonical string, preventing duplicate database entries.
175
+
176
+ Args:
177
+ dn: LDAP Distinguished Name to canonicalize
178
+
179
+ Returns:
180
+ str: Canonical lowercase DN with normalized whitespace and sorted RDN components.
181
+ None: If DN parsing fails. Callers should handle this explicitly to avoid
182
+ inconsistent matching behavior.
183
+
184
+ Examples:
185
+ >>> canonicalize_dn("cn=John,ou=Users,dc=Example,dc=com")
186
+ 'cn=john,ou=users,dc=example,dc=com'
187
+
188
+ >>> canonicalize_dn("CN=john+EMAIL=john@corp.com,OU=users,DC=example,DC=com")
189
+ 'cn=john+email=john@corp.com,ou=users,dc=example,dc=com'
190
+
191
+ >>> canonicalize_dn("email=john@corp.com+cn=John,ou=Users,dc=Example,dc=com")
192
+ 'cn=john+email=john@corp.com,ou=users,dc=example,dc=com' # Sorted
193
+
194
+ >>> canonicalize_dn("invalid dn syntax")
195
+ None
196
+
197
+ References:
198
+ RFC 4514 Section 4: String representation of DNs are case-insensitive
199
+ ldap3 parse_dn(): Validates syntax and decomposes into components
200
+ """
201
+ # Handle empty DN (root DSE) - this is a valid DN per RFC 4514
202
+ if not dn.strip():
203
+ return ""
204
+
205
+ try:
206
+ # Parse DN with escaping and whitespace stripping
207
+ components = parse_dn(dn, escape=True, strip=True)
208
+ except LDAPInvalidDnError:
209
+ # Return None instead of falling back to simple lowercase.
210
+ # This prevents inconsistent canonicalization where the same DN
211
+ # could have different canonical forms depending on parser behavior.
212
+ # Callers must handle None explicitly (typically by skipping the DN).
213
+ return None
214
+
215
+ # Build canonical DN
216
+ canonical_parts = []
217
+ current_rdn_components = []
218
+
219
+ for attr_type, attr_value, separator in components:
220
+ # Normalize attribute type and value to lowercase
221
+ normalized_component = (attr_type.lower(), attr_value.lower())
222
+ current_rdn_components.append(normalized_component)
223
+
224
+ # When we hit a comma (or end), we've completed an RDN
225
+ if separator == "," or separator == "":
226
+ # Sort multi-valued RDN components for deterministic output
227
+ # Example: "email=x+cn=y" and "cn=y+email=x" both become "cn=y+email=x"
228
+ # Sorts by (type, value) tuple to handle rare cases of duplicate attribute types
229
+ current_rdn_components.sort()
230
+
231
+ # Format the RDN
232
+ rdn_str = "+".join(f"{attr}={value}" for attr, value in current_rdn_components)
233
+ canonical_parts.append(rdn_str)
234
+
235
+ # Reset for next RDN
236
+ current_rdn_components = []
237
+
238
+ return ",".join(canonical_parts)
239
+
240
+
241
+ # Unicode marker for identifying LDAP users in oauth2_client_id column.
242
+ # U+E000 from Private Use Area - guaranteed never to be assigned by Unicode Standard.
243
+ #
244
+ # Design Context:
245
+ # Phoenix's user table was originally designed for OAuth2 providers, using
246
+ # oauth2_client_id to identify the authentication source (e.g., "google",
247
+ # "github"). LDAP users need a distinct marker to differentiate them from
248
+ # OAuth users without requiring a database schema migration.
249
+ #
250
+ # The "(stopgap)" Suffix:
251
+ # Indicates this is a temporary solution. A future schema change should add
252
+ # a dedicated identity_provider column (enum: "local", "ldap", "oauth2", etc.)
253
+ # with oauth2_client_id nullable only for OAuth users. This marker enables
254
+ # LDAP support without blocking on that migration.
255
+ #
256
+ # Why U+E000?
257
+ # Private Use Area characters cannot appear in legitimate OAuth client IDs,
258
+ # ensuring no collision with real OAuth providers. The marker is also
259
+ # unlikely to be accidentally typed or injected.
260
+ LDAP_CLIENT_ID_MARKER: Final[str] = "\ue000LDAP(stopgap)"
261
+
262
+
263
+ def is_ldap_user(oauth2_client_id: str | None) -> bool:
264
+ """Check if an oauth2_client_id indicates an LDAP user.
265
+
266
+ This function checks for the LDAP_CLIENT_ID_MARKER prefix to distinguish
267
+ LDAP-authenticated users from OAuth2-authenticated users. Used throughout
268
+ the codebase to apply LDAP-specific logic (e.g., re-authentication flows,
269
+ password change handling).
270
+
271
+ Args:
272
+ oauth2_client_id: The OAuth2 client ID to check (can be None)
273
+
274
+ Returns:
275
+ True if the client ID indicates an LDAP user, False otherwise
276
+
277
+ Example:
278
+ >>> is_ldap_user("\\ue000LDAP(stopgap):user-unique-id")
279
+ True
280
+ >>> is_ldap_user("google-oauth2|12345")
281
+ False
282
+ >>> is_ldap_user(None)
283
+ False
284
+ """
285
+ return bool(oauth2_client_id and oauth2_client_id.startswith(LDAP_CLIENT_ID_MARKER))
286
+
287
+
288
+ # Marker for null email values in the database.
289
+ #
290
+ # When LDAP directories don't have email attributes, Phoenix generates a
291
+ # deterministic marker to satisfy the database's NOT NULL constraint. This
292
+ # marker uses a Private Use Area (PUA) Unicode character to ensure it cannot
293
+ # collide with any real email address.
294
+ #
295
+ # Format: "\ue000NULL(stopgap)" + md5(unique_id)
296
+ # - \uE000: PUA character (guaranteed never assigned by Unicode Standard)
297
+ # - NULL: Human-readable indicator that email is absent
298
+ # - md5(unique_id): Deterministic hash for uniqueness (32 hex chars)
299
+ #
300
+ # Example: "\ue000NULL(stopgap)7f3d2a1b9c8e4f5da2b6c903e1f47d8b"
301
+ #
302
+ # Design Context:
303
+ # This is a temporary bridge solution. The eventual solution is to make the
304
+ # email column nullable in the database schema. Until then, this marker
305
+ # enables LDAP authentication for directories without email attributes.
306
+ #
307
+ # Security Note:
308
+ # MD5 is used for deterministic uniqueness, NOT cryptographic security.
309
+ # The hash ensures the same unique_id always produces the same marker,
310
+ # preventing race conditions on concurrent logins.
311
+ NULL_EMAIL_MARKER_PREFIX: Final[str] = "\ue000NULL(stopgap)"
312
+
313
+
314
+ def generate_null_email_marker(unique_id: str) -> str:
315
+ """Generate a deterministic null email marker from a unique_id.
316
+
317
+ This function creates a marker for LDAP users whose directories don't
318
+ have email attributes. The marker satisfies the database's NOT NULL
319
+ constraint while being programmatically distinguishable from real emails.
320
+
321
+ The marker is deterministic: the same unique_id always produces the same
322
+ marker. This prevents race conditions when the same user logs in
323
+ concurrently from multiple sessions.
324
+
325
+ Args:
326
+ unique_id: The LDAP unique identifier (objectGUID, entryUUID, etc.)
327
+ Must be non-empty.
328
+
329
+ Returns:
330
+ A null email marker in format: "\\ue000NULL(stopgap){md5_hash}"
331
+
332
+ Raises:
333
+ ValueError: If unique_id is empty or None.
334
+
335
+ Example:
336
+ >>> generate_null_email_marker("550E8400-E29B-41D4-A716-446655440000")
337
+ '\\ue000NULL(stopgap)7f3d2a1b9c8e4f5da2b6c903e1f47d8b'
338
+ """
339
+ if not unique_id:
340
+ raise ValueError("unique_id is required to generate null email marker")
341
+
342
+ # Normalize to lowercase for consistent hashing (UUIDs are case-insensitive)
343
+ normalized = unique_id.lower()
344
+ return f"{NULL_EMAIL_MARKER_PREFIX}{md5(normalized.encode()).hexdigest()}"
345
+
346
+
347
+ def is_null_email_marker(email: str) -> bool:
348
+ """Check if an email value is a null email marker.
349
+
350
+ This function identifies placeholder values that were generated for LDAP
351
+ users whose directories don't have email attributes. Used to:
352
+ - Hide placeholder emails in the UI
353
+ - Skip email operations (welcome emails, password reset)
354
+ - Validate that users aren't trying to log in with marker values
355
+
356
+ Args:
357
+ email: The email value to check
358
+
359
+ Returns:
360
+ True if the value is a null email marker, False otherwise.
361
+
362
+ Example:
363
+ >>> is_null_email_marker("\\ue000NULL(stopgap)7f3d2a1b9c8e4f5da2b6c903e1f47d8b")
364
+ True
365
+ >>> is_null_email_marker("alice@example.com")
366
+ False
367
+ """
368
+ return email.startswith(NULL_EMAIL_MARKER_PREFIX)
369
+
370
+
371
+ class LDAPUserInfo(NamedTuple):
372
+ """Authenticated LDAP user information.
373
+
374
+ Attributes:
375
+ email: User's email address, or None if PHOENIX_LDAP_ATTR_EMAIL is "null".
376
+ When None, a null email marker will be generated from unique_id.
377
+ display_name: User's display name for UI
378
+ groups: Tuple of group DNs the user belongs to (immutable)
379
+ user_dn: User's Distinguished Name (for audit/logging, NOT used for identity matching)
380
+ ldap_username: Username used to authenticate
381
+ role: Phoenix role mapped from LDAP groups
382
+ unique_id: Immutable identifier (objectGUID/entryUUID). Required when email is None.
383
+ """
384
+
385
+ email: str | None
386
+ display_name: str
387
+ groups: tuple[str, ...]
388
+ user_dn: str
389
+ ldap_username: str
390
+ role: str
391
+ unique_id: str | None = None # objectGUID (AD), entryUUID (OpenLDAP) if configured
392
+
393
+
394
+ class LDAPAuthenticator:
395
+ """Handles LDAP authentication and user attribute retrieval.
396
+
397
+ Supports both Active Directory and OpenLDAP:
398
+ - Active Directory: Uses memberOf attribute for group membership
399
+ - OpenLDAP/POSIX: Uses group search with member attribute
400
+ - Multi-server failover for high availability
401
+ - TLS/LDAPS with certificate validation (RFC 4513)
402
+ - Group-based role mapping with wildcard support
403
+ """
404
+
405
+ # Maximum credential lengths to prevent DoS via oversized inputs.
406
+ # These are generous limits - real usernames/passwords are much shorter.
407
+ _MAX_USERNAME_LENGTH: Final[int] = 256
408
+ _MAX_PASSWORD_LENGTH: Final[int] = 1024
409
+
410
+ def __init__(self, config: LDAPConfig):
411
+ """Initialize LDAP authenticator with configuration.
412
+
413
+ Args:
414
+ config: LDAP configuration including servers, search bases, and mappings
415
+ """
416
+ self.config = config
417
+ self.servers = self._create_servers()
418
+
419
+ def _create_servers(self) -> list[Server]:
420
+ """Create ldap3 Server objects for all configured hosts.
421
+
422
+ TLS Configuration Modes:
423
+ Phoenix supports three LDAP connection modes via tls_mode:
424
+
425
+ 1. LDAPS (tls_mode="ldaps", port 636):
426
+ - Server: use_ssl=True, tls=<Tls config>
427
+ - TLS established at TCP connection layer (like HTTPS)
428
+ - Bind credentials encrypted from the start
429
+ - No start_tls() call needed
430
+
431
+ 2. STARTTLS (tls_mode="starttls", port 389):
432
+ - Server: use_ssl=False, tls=<Tls config>
433
+ - Connection starts plaintext, upgraded to TLS via start_tls()
434
+ - Bind credentials encrypted ONLY after start_tls() completes
435
+ - CRITICAL: Must call start_tls() before bind (see _establish_connection)
436
+
437
+ 3. Plaintext (tls_mode="none", testing only):
438
+ - Server: use_ssl=False, tls=None
439
+ - All data transmitted unencrypted
440
+ - NOT recommended for production
441
+
442
+ Key ldap3 Parameters:
443
+ use_ssl: Enable TLS at connection layer (True for LDAPS only)
444
+ tls: TLS configuration object (cert validation, certificates, etc.)
445
+ - Set for both LDAPS and STARTTLS (start_tls() uses this config)
446
+ - None for plaintext mode
447
+ - Supports advanced options:
448
+ * Custom CA certificates (ca_certs_file)
449
+ * Client certificates for mutual TLS
450
+ (local_certificate_file, local_private_key_file)
451
+
452
+ Returns:
453
+ list[Server]: Server objects for all configured hosts (supports failover).
454
+ """
455
+ tls_config = None
456
+ use_tls = self.config.tls_mode != "none"
457
+ if use_tls:
458
+ # Configure TLS with certificate validation and optional advanced settings
459
+ tls_kwargs: dict[str, Any] = {
460
+ "validate": ssl.CERT_REQUIRED if self.config.tls_verify else ssl.CERT_NONE
461
+ }
462
+
463
+ # Custom CA certificate for private/internal CAs
464
+ if self.config.tls_ca_cert_file:
465
+ tls_kwargs["ca_certs_file"] = self.config.tls_ca_cert_file
466
+
467
+ # Client certificate for mutual TLS
468
+ if self.config.tls_client_cert_file and self.config.tls_client_key_file:
469
+ tls_kwargs["local_certificate_file"] = self.config.tls_client_cert_file
470
+ tls_kwargs["local_private_key_file"] = self.config.tls_client_key_file
471
+
472
+ tls_config = Tls(**tls_kwargs)
473
+
474
+ servers = []
475
+ for host in self.config.hosts:
476
+ server = Server(
477
+ host,
478
+ port=self.config.port,
479
+ use_ssl=(self.config.tls_mode == "ldaps"),
480
+ tls=tls_config,
481
+ connect_timeout=10,
482
+ get_info=NONE, # Don't fetch schema/DSA info we don't use
483
+ )
484
+ servers.append(server)
485
+
486
+ return servers
487
+
488
+ def _establish_connection(self, server: Server) -> Connection:
489
+ """Establish a connection to the LDAP server.
490
+
491
+ Connection Flow by TLS Mode:
492
+ STARTTLS Mode (tls_mode="starttls"):
493
+ 1. Open plaintext TCP connection (port 389)
494
+ 2. Send Extended Request to upgrade to TLS
495
+ 3. Perform TLS handshake
496
+ 4. Send bind credentials (now encrypted)
497
+
498
+ Implementation: Use AUTO_BIND_TLS_BEFORE_BIND to ensure step 2-3
499
+ happen before step 4.
500
+
501
+ LDAPS Mode (tls_mode="ldaps"):
502
+ 1. Establish TLS connection (port 636)
503
+ 2. Send bind credentials (already encrypted)
504
+
505
+ Implementation: Use AUTO_BIND_NO_TLS (TLS already active from Server)
506
+
507
+ Plaintext Mode (tls_mode="none"):
508
+ 1. Open plaintext TCP connection (port 389)
509
+ 2. Send bind credentials (unencrypted)
510
+
511
+ Implementation: Use AUTO_BIND_NO_TLS (no TLS to upgrade)
512
+
513
+ ldap3 auto_bind Modes:
514
+ AUTO_BIND_TLS_BEFORE_BIND: Call start_tls(), then bind
515
+ - Required for STARTTLS to encrypt credentials
516
+ AUTO_BIND_NO_TLS: Bind immediately without calling start_tls()
517
+ - Correct for LDAPS (TLS already active via use_ssl=True)
518
+ - Correct for plaintext (no TLS desired)
519
+
520
+ Security Note:
521
+ For STARTTLS, using auto_bind=True (or AUTO_BIND_NO_TLS) would
522
+ transmit bind credentials in PLAINTEXT before upgrading to TLS.
523
+ This is a critical security vulnerability.
524
+
525
+ Bind Types:
526
+ Service account: Uses config.bind_dn and config.bind_password
527
+ Anonymous: No credentials (for servers allowing anonymous reads)
528
+
529
+ Args:
530
+ server: Server object (from _create_servers) with TLS pre-configured.
531
+
532
+ Returns:
533
+ Connection: Bound connection (service account or anonymous).
534
+ """
535
+ # Determine auto_bind mode based on TLS configuration
536
+ # CRITICAL: Must use AUTO_BIND_TLS_BEFORE_BIND for STARTTLS to encrypt passwords
537
+ auto_bind_mode: Literal["DEFAULT", "NONE", "NO_TLS", "TLS_BEFORE_BIND", "TLS_AFTER_BIND"]
538
+ if self.config.tls_mode == "starttls":
539
+ auto_bind_mode = AUTO_BIND_TLS_BEFORE_BIND
540
+ else:
541
+ # LDAPS: TLS already active via use_ssl=True on Server, bind normally
542
+ # Plaintext (none): No TLS, bind normally
543
+ auto_bind_mode = AUTO_BIND_NO_TLS
544
+
545
+ if self.config.bind_dn and self.config.bind_password:
546
+ # Service account bind using ldap3's auto_bind feature.
547
+ #
548
+ # Socket Cleanup Note (ldap3 library behavior):
549
+ # ldap3's auto_bind has inconsistent socket cleanup on failure:
550
+ # - LDAPS mode: _cleanup_socket() called on TLS wrap failure (base.py:292)
551
+ # - STARTTLS mode: NO cleanup if wrap_socket raises in _start_tls (tls.py:287-291)
552
+ # - bind() failure: NO cleanup, exception propagates up
553
+ # - start_tls() returns False: unbind() IS called (connection.py:424)
554
+ #
555
+ # If the constructor raises (during _do_auto_bind), the socket may leak until
556
+ # Python's GC collects the Connection object. This is acceptable because:
557
+ # 1. GC will eventually close the socket (Connection has no __del__, but socket does)
558
+ # 2. This only affects service account bind during TLS/bind failures (rare)
559
+ # 3. Phoenix has timeouts (10s connect, 30s operations) preventing hangs
560
+ # 4. Rate limiting prevents attackers from rapidly triggering many leaks
561
+ #
562
+ # The anonymous bind path below has explicit cleanup because we control the
563
+ # sequencing. For service account binds, we rely on ldap3's auto_bind which
564
+ # handles the common success case correctly.
565
+ #
566
+ # See: https://github.com/cannatag/ldap3 for upstream library
567
+ return Connection(
568
+ server,
569
+ user=self.config.bind_dn,
570
+ password=self.config.bind_password,
571
+ auto_bind=auto_bind_mode,
572
+ raise_exceptions=True,
573
+ receive_timeout=30, # Timeout for LDAP operations (bind, search)
574
+ # SECURITY: Disable referral following to prevent credential leakage.
575
+ # ldap3 defaults to following referrals to ANY server and sending credentials.
576
+ # An attacker-controlled referral could steal service account credentials.
577
+ # Phoenix already has multi-server failover, so referrals are unnecessary.
578
+ auto_referrals=False,
579
+ )
580
+
581
+ # Anonymous bind case - must manually sequence open/start_tls before bind
582
+ #
583
+ # AUTO_BIND_DEFAULT defers bind() until the context manager is entered.
584
+ # This is NOT the same as AUTO_BIND_NONE (which skips bind entirely).
585
+ #
586
+ # Why not AUTO_BIND_TLS_BEFORE_BIND?
587
+ # That performs open→start_tls→bind atomically in the constructor.
588
+ # For anonymous binds (no user/password), we need manual sequencing
589
+ # to ensure start_tls() completes before bind(). The sequence is:
590
+ #
591
+ # 1. Connection() - creates connection object (no network I/O)
592
+ # 2. open() - establishes TCP connection
593
+ # 3. start_tls() - upgrades to TLS (for STARTTLS mode)
594
+ # 4. return conn - caller uses `with conn:` which triggers bind()
595
+ #
596
+ # This ensures TLS is active before any bind credentials are sent.
597
+ conn = Connection(
598
+ server,
599
+ auto_bind=AUTO_BIND_DEFAULT,
600
+ raise_exceptions=True,
601
+ receive_timeout=30,
602
+ # SECURITY: Disable referral following (see service account connection above)
603
+ auto_referrals=False,
604
+ )
605
+ try:
606
+ conn.open()
607
+ # Upgrade to TLS for STARTTLS mode before any bind operations
608
+ if self.config.tls_mode == "starttls":
609
+ conn.start_tls()
610
+ return conn
611
+ except Exception:
612
+ # CRITICAL: Unbind on any exception to prevent socket leak
613
+ # Threat: open() or start_tls() may open a socket but raise before bind.
614
+ # Without cleanup, repeated TLS handshake failures would leak file descriptors
615
+ # and eventually exhaust the process (DoS). unbind() safely closes socket
616
+ # even if connection was never bound.
617
+ conn.unbind() # type: ignore[no-untyped-call]
618
+ raise
619
+
620
+ async def authenticate(self, username: str, password: str) -> LDAPUserInfo | None:
621
+ """Authenticate user against LDAP and return user info.
622
+
623
+ This method performs the following steps:
624
+ 1. Connect to LDAP server (with failover if multiple servers configured)
625
+ 2. Bind with service account (if configured) or directly with user credentials
626
+ 3. Search for user by username
627
+ 4. Authenticate user (bind with user's credentials)
628
+ 5. Retrieve user attributes (email, display name)
629
+ 6. Query user's group memberships
630
+ 7. Map groups to Phoenix role
631
+
632
+ Performance & Security - Thread Pool Isolation:
633
+ All LDAP operations (connection, TLS handshake, bind, search) are executed
634
+ in a thread pool to prevent blocking the FastAPI event loop.
635
+
636
+ Why this matters (DoS prevention):
637
+ - ldap3 library is synchronous-only (blocks calling thread)
638
+ - Without isolation, each /auth/ldap/login blocks the event loop
639
+ - Attacker opens slow TLS handshakes → starves all FastAPI workers
640
+ - Even rate-limited requests would queue indefinitely
641
+
642
+ Mitigation: anyio.to_thread.run_sync() runs LDAP ops in background threads,
643
+ keeping the main event loop responsive for other requests.
644
+
645
+ Timeout Architecture (Defense-in-Depth):
646
+ Multiple timeout layers ensure no single failure can hang the system:
647
+
648
+ ┌─────────────────────────────────────────────────────────────┐
649
+ │ HTTP Request: 60s (anyio.fail_after) │
650
+ │ Returns 500 to client if exceeded; thread continues │
651
+ │ ┌─────────────────────────────────────────────────────────┐│
652
+ │ │ Thread Pool Task (no direct timeout) ││
653
+ │ │ Runs until LDAP operation completes or socket times out│
654
+ │ │ ┌─────────────────────────────────────────────────────┐││
655
+ │ │ │ LDAP Operation: 30s (receive_timeout) │││
656
+ │ │ │ Bind, search, and other LDAP protocol operations │││
657
+ │ │ │ ┌─────────────────────────────────────────────────┐│││
658
+ │ │ │ │ TCP Connect: 10s (connect_timeout) ││││
659
+ │ │ │ │ Initial socket connection to LDAP server ││││
660
+ │ │ │ └─────────────────────────────────────────────────┘│││
661
+ │ │ └─────────────────────────────────────────────────────┘││
662
+ │ └─────────────────────────────────────────────────────────┘│
663
+ └─────────────────────────────────────────────────────────────┘
664
+
665
+ Rationale for each layer:
666
+ - TCP Connect (10s): Network unreachable or firewall should fail fast
667
+ - LDAP Operation (30s): Bind/search should complete quickly; slow response
668
+ indicates server overload. This is the actual timeout that stops the thread.
669
+ - HTTP Request (60s): Prevents client from hanging indefinitely. Note that
670
+ Python threads running native C code cannot be cancelled, so this only
671
+ returns an error to the client—the thread continues until socket timeout.
672
+
673
+ Multi-Server Failover & Load Distribution:
674
+ When multiple LDAP servers are configured, they are shuffled randomly on each
675
+ authentication attempt. This provides load distribution across replicas and
676
+ prevents a slow primary from always causing delays. Failover to the next
677
+ server occurs on LDAPException (connection failure, timeout, etc.).
678
+
679
+ Each server attempt can take up to 30s (receive_timeout) if the server
680
+ accepts TCP but doesn't respond to LDAP ops.
681
+
682
+ With N unresponsive servers: N × 30s total time before all servers exhausted.
683
+ - 1 server: 30s max (well within 60s HTTP timeout)
684
+ - 2 servers: 60s max (equals HTTP timeout—may return before 2nd completes)
685
+ - 3+ servers: exceeds 60s (HTTP timeout fires, not all servers tried)
686
+
687
+ This is an intentional trade-off: the 60s HTTP timeout prioritizes client
688
+ experience over exhaustively trying all servers. In practice, if multiple
689
+ servers are all unresponsive, the infrastructure has larger problems. The
690
+ 60s limit also aligns with common load balancer timeouts (nginx, AWS ALB).
691
+
692
+ Security:
693
+ - Empty username/password rejected (prevents anonymous bind bypass)
694
+ - LDAP injection prevention via RFC 4515 escaping (blocks filter manipulation)
695
+ - Exception sanitization (no internal server details leaked to attackers)
696
+ - Thread pool isolation prevents event loop DoS (slow LDAP can't block other requests)
697
+ - Timeouts prevent resource exhaustion (hanging threads would accumulate)
698
+ - Socket cleanup prevents file descriptor leaks (failed binds close connections)
699
+ - Timing attack mitigation via dummy bind (prevents username enumeration)
700
+
701
+ Args:
702
+ username: LDAP username (e.g., "jdoe" for Active Directory sAMAccountName)
703
+ password: User's password
704
+
705
+ Returns:
706
+ LDAPUserInfo object or None if authentication fails (including timeout)
707
+ """
708
+ # Run synchronous ldap3 operations in thread pool to avoid blocking event loop.
709
+ #
710
+ # Concurrency limiting: _get_ldap_limiter() caps concurrent LDAP operations to
711
+ # prevent thread pool exhaustion during traffic spikes. Requests exceeding the
712
+ # limit will wait (not fail) until a slot is available.
713
+ #
714
+ # Timeout handling: fail_after() prevents HTTP request hang but cannot stop the
715
+ # thread itself (threads running native code cannot be cancelled). The real
716
+ # timeout is receive_timeout=30 on Connection objects, which terminates blocking
717
+ # socket operations inside the thread. We catch TimeoutError to return a clean
718
+ # authentication failure rather than propagating a 500 error.
719
+ try:
720
+ with anyio.fail_after(60):
721
+ return await anyio.to_thread.run_sync(
722
+ self._authenticate,
723
+ username,
724
+ password,
725
+ limiter=_get_ldap_limiter(),
726
+ )
727
+ except TimeoutError:
728
+ # LDAP operation exceeded 60s timeout. This typically means:
729
+ # 1. LDAP server is overloaded or unresponsive
730
+ # 2. Network issues causing slow responses
731
+ # 3. Very slow TLS handshake (e.g., OCSP/CRL checks)
732
+ #
733
+ # The background thread continues running until socket timeout (30s),
734
+ # but we return immediately to the client. Log as error for monitoring.
735
+ logger.error(
736
+ "LDAP authentication timed out after 60 seconds. "
737
+ "Check LDAP server health and network connectivity."
738
+ )
739
+ return None
740
+
741
+ def _authenticate(self, username: str, password: str) -> LDAPUserInfo | None:
742
+ """Synchronous LDAP authentication (called from thread pool via authenticate())."""
743
+ # SECURITY: Reject empty credentials to prevent anonymous bind bypass
744
+ # Threat: LDAP RFC 4513 §5.1.2 defines Simple Authentication with empty password
745
+ # as "unauthenticated". Many LDAP servers grant anonymous read access for empty
746
+ # password (bind succeeds with DN but no actual authentication). An attacker could
747
+ # send empty password to bypass authentication if we don't explicitly check.
748
+ if not username or not username.strip():
749
+ logger.warning("LDAP authentication rejected: empty username")
750
+ return None
751
+ if not password:
752
+ logger.warning("LDAP authentication rejected: empty password")
753
+ return None
754
+
755
+ # SECURITY: Reject oversized credentials to prevent DoS
756
+ # Threat: Attacker sends megabyte-sized username/password to waste memory,
757
+ # CPU (escaping, filter building), and LDAP server resources.
758
+ if len(username) > self._MAX_USERNAME_LENGTH:
759
+ logger.warning("LDAP authentication rejected: username too long")
760
+ return None
761
+ if len(password) > self._MAX_PASSWORD_LENGTH:
762
+ logger.warning("LDAP authentication rejected: password too long")
763
+ return None
764
+
765
+ # SECURITY: Prevent LDAP filter injection (RFC 4515)
766
+ # Attack: username="*" or "admin*" or "admin)(uid=*" could bypass authentication
767
+ # or enumerate users. escape_filter_chars() escapes special LDAP filter characters:
768
+ # * → \2a, ( → \28, ) → \29, \ → \5c, NUL → \00
769
+ escaped_username = escape_filter_chars(username)
770
+
771
+ # Shuffle servers for load distribution across replicas.
772
+ # Since LDAP servers are assumed to be replicas with identical data,
773
+ # randomizing the order prevents the first server from receiving all
774
+ # initial requests and provides more even load distribution.
775
+ servers = random.sample(self.servers, len(self.servers))
776
+ for server in servers:
777
+ try:
778
+ # Step 1: Create connection with service account (or anonymous)
779
+ with self._establish_connection(server) as conn:
780
+ # Step 2 & 3: Search for user
781
+ user_entry = self._search_user(conn, escaped_username)
782
+ if not user_entry:
783
+ # TIMING ATTACK MITIGATION: Perform dummy bind to prevent username
784
+ # enumeration
785
+ #
786
+ # Without this, an attacker could distinguish "user not found" from
787
+ # "wrong password" by measuring response times:
788
+ # - User not found: Fast response (only search performed)
789
+ # - Wrong password: Slow response (search + bind attempt)
790
+ #
791
+ # By always performing a bind operation (even with dummy credentials
792
+ # when user doesn't exist), both code paths take similar time,
793
+ # preventing attackers from enumerating valid usernames.
794
+ #
795
+ # The dummy DN is intentionally invalid and will always fail bind,
796
+ # but the network round-trip and TLS operations equalize timing.
797
+ self._dummy_bind_for_timing(server, password)
798
+ logger.info("User not found in LDAP directory")
799
+
800
+ # DESIGN DECISION: Return immediately instead of trying other servers
801
+ #
802
+ # Why not failover to other servers when user is not found?
803
+ #
804
+ # 1. SEMANTIC CORRECTNESS (primary reason):
805
+ # In a properly configured LDAP environment, "user not found" is a
806
+ # definitive answer. Failover servers are replicas of the same directory
807
+ # and should have identical user sets. If user doesn't exist on server A,
808
+ # they won't exist on server B either. Multi-server failover is designed
809
+ # for server unavailability (LDAPException → continue), not for data
810
+ # inconsistency between replicas.
811
+ #
812
+ # 2. EDGE CASES (replica lag, AD GC/DC differences):
813
+ # Temporary inconsistencies can occur during replication, but these are
814
+ # rare and transient. Designing around them would add complexity for
815
+ # little practical benefit, and could mask underlying infrastructure
816
+ # issues that should be addressed at the LDAP layer.
817
+ return None
818
+
819
+ user_dn = user_entry.entry_dn
820
+
821
+ # Step 4: Authenticate user by binding with their credentials
822
+ # We use a separate connection to verify the password to avoid
823
+ # dropping the main connection which might be needed for group search.
824
+ if not self._verify_user_password(server, user_dn, password):
825
+ logger.info("LDAP password verification failed")
826
+ return None
827
+
828
+ # Step 5: Extract user attributes
829
+ # Email handling depends on whether attr_email is configured:
830
+ # - If configured: read from LDAP, fail if missing
831
+ # - If empty: email will be None, marker generated later
832
+ email: str | None = None
833
+ if self.config.attr_email:
834
+ email = _get_attribute(user_entry, self.config.attr_email)
835
+ if not email:
836
+ # Fail loudly: admin configured an attribute that doesn't exist
837
+ logger.error(
838
+ f"LDAP user missing required email attribute "
839
+ f"({self.config.attr_email}). Either populate this attribute "
840
+ f"or set PHOENIX_LDAP_ATTR_EMAIL=null"
841
+ )
842
+ return None
843
+ # else: email stays None, will be handled by get_or_create_ldap_user
844
+
845
+ display_name = (
846
+ _get_attribute(user_entry, self.config.attr_display_name)
847
+ if self.config.attr_display_name
848
+ else None
849
+ )
850
+
851
+ # Extract unique_id if configured (objectGUID, entryUUID, etc.)
852
+ unique_id: str | None = None
853
+ if self.config.attr_unique_id:
854
+ unique_id = _get_unique_id(user_entry, self.config.attr_unique_id)
855
+ if not unique_id:
856
+ # Fail loudly: user explicitly configured unique_id, so missing
857
+ # attribute indicates misconfiguration (likely typo). Don't silently
858
+ # fall back to email - that would mask the error.
859
+ logger.error(
860
+ f"LDAP user missing configured unique_id attribute "
861
+ f"({self.config.attr_unique_id}). "
862
+ f"Check PHOENIX_LDAP_ATTR_UNIQUE_ID "
863
+ f"spelling. Common values: objectGUID (AD), entryUUID (OpenLDAP)."
864
+ )
865
+ return None
866
+
867
+ # Step 6: Get user's group memberships
868
+ # Reuses the existing service/anonymous connection
869
+ groups = self._get_user_groups(conn, user_entry, username)
870
+
871
+ # Step 7: Map groups to Phoenix role
872
+ role = self.map_groups_to_role(groups)
873
+ if not role:
874
+ logger.info(
875
+ "LDAP authentication denied: user not member of any configured group. "
876
+ "Configure PHOENIX_LDAP_GROUP_ROLE_MAPPINGS to include user's groups."
877
+ )
878
+ return None
879
+
880
+ return LDAPUserInfo(
881
+ email=email,
882
+ display_name=display_name or username,
883
+ groups=tuple(groups),
884
+ user_dn=user_dn,
885
+ ldap_username=username,
886
+ role=role,
887
+ unique_id=unique_id,
888
+ )
889
+
890
+ except LDAPException as e:
891
+ # SECURITY: Don't leak internal LDAP server error details
892
+ # Threat: Exception messages may contain sensitive info (server IPs, DNs,
893
+ # configuration details, internal paths). Only log error type (e.g.,
894
+ # "LDAPSocketOpenError") to avoid information disclosure to attackers
895
+ # monitoring logs or error responses.
896
+ logger.warning(
897
+ f"LDAP server {server.host} failed during authentication. "
898
+ f"Error type: {type(e).__name__}"
899
+ )
900
+ continue # Try next server
901
+
902
+ # All servers failed
903
+ logger.error("All LDAP servers failed")
904
+ return None
905
+
906
+ def _search_user(self, conn: Connection, escaped_username: str) -> Entry | None:
907
+ """Search for user in LDAP directory across all configured search bases.
908
+
909
+ Searches each base DN in order until a user is found. This allows organizations
910
+ with users in multiple OUs (e.g., employees and contractors) to authenticate
911
+ against a single LDAP configuration.
912
+
913
+ Args:
914
+ conn: Active LDAP connection
915
+ escaped_username: Escaped username for filter
916
+
917
+ Returns:
918
+ User entry or None if not found or ambiguous
919
+ """
920
+ user_filter = self.config.user_search_filter.replace("%s", escaped_username)
921
+
922
+ # Build attribute list - filter out None values (e.g., attr_email in no-email mode)
923
+ attributes = [
924
+ attr
925
+ for attr in [
926
+ self.config.attr_email,
927
+ self.config.attr_display_name,
928
+ self.config.attr_member_of,
929
+ self.config.attr_unique_id,
930
+ self.config.group_search_filter_user_attr,
931
+ ]
932
+ if attr # Filter out None and empty strings
933
+ ]
934
+
935
+ # Search each base DN in order
936
+ for search_base in self.config.user_search_base_dns:
937
+ conn.search(
938
+ search_base=search_base,
939
+ search_filter=user_filter,
940
+ search_scope=SUBTREE,
941
+ attributes=attributes,
942
+ )
943
+
944
+ if len(conn.entries) == 0:
945
+ # Not found in this base, try next
946
+ continue
947
+ elif len(conn.entries) > 1:
948
+ # SECURITY: Reject ambiguous results to prevent non-deterministic authentication
949
+ # Attack scenario: Username "jsmith" exists in both ou=contractors,dc=corp and
950
+ # ou=employees,dc=corp. Blindly taking first result means authentication outcome
951
+ # depends on LDAP server's arbitrary ordering (could change between queries).
952
+ # This allows an attacker to exploit timing or replica inconsistencies.
953
+ logger.error(
954
+ f"Ambiguous LDAP search: found {len(conn.entries)} matching entries "
955
+ f"in search base '{search_base}'. Rejecting authentication for safety. "
956
+ f"Fix: Use more specific user_search_filter to ensure unique results."
957
+ )
958
+ return None
959
+ else:
960
+ # Exactly one match - success
961
+ return cast(Entry, conn.entries[0])
962
+
963
+ # Not found in any search base
964
+ logger.info("LDAP user search returned no results in any configured search base")
965
+ return None
966
+
967
+ def _dummy_bind_for_timing(self, server: Server, password: str) -> None:
968
+ """Perform a dummy bind to equalize response timing when user is not found.
969
+
970
+ Timing Attack Prevention:
971
+ This method exists solely to prevent username enumeration via timing attacks.
972
+
973
+ Attack scenario without mitigation:
974
+ 1. Attacker sends login request with "admin" / "wrongpass"
975
+ 2. If "admin" exists: search succeeds → bind attempted → ~150ms response
976
+ 3. If "admin" doesn't exist: search fails → immediate return → ~50ms response
977
+ 4. Attacker measures response times to enumerate valid usernames
978
+
979
+ Mitigation:
980
+ When a user is not found, we still perform a bind operation against
981
+ a known-invalid DN. This ensures both "user not found" and "wrong password"
982
+ code paths perform similar network operations (TLS handshake, bind attempt),
983
+ making response times indistinguishable.
984
+
985
+ Why this works:
986
+ The timing-sensitive operations are network I/O (TLS, LDAP protocol).
987
+ By performing the same I/O operations regardless of whether the user exists,
988
+ we eliminate the timing side-channel. The dummy bind will always fail
989
+ (invalid DN), but the network round-trip equalizes timing.
990
+
991
+ Args:
992
+ server: LDAP server to connect to (same as real bind).
993
+ password: User-provided password (used for realistic timing).
994
+ """
995
+ # Use a randomized invalid DN to prevent caching/optimization by LDAP server
996
+ # The actual credentials don't matter - we just need the network round-trip
997
+ dummy_dn = f"cn=dummy-{token_hex(8)},dc=invalid,dc=local"
998
+ try:
999
+ self._verify_user_password(server, dummy_dn, password)
1000
+ except Exception:
1001
+ # Expected to fail - we only care about the timing, not the result
1002
+ pass
1003
+
1004
+ def _verify_user_password(self, server: Server, user_dn: str, password: str) -> bool:
1005
+ """Verify user's password by attempting to bind as that user.
1006
+
1007
+ TLS Sequencing for STARTTLS:
1008
+ Unlike _establish_connection (which uses AUTO_BIND_TLS_BEFORE_BIND),
1009
+ this method uses manual sequencing:
1010
+
1011
+ 1. Create connection with auto_bind=False
1012
+ 2. Open connection (plaintext for STARTTLS)
1013
+ 3. Call start_tls() explicitly (upgrade to TLS)
1014
+ 4. Call bind() with user credentials (now encrypted)
1015
+
1016
+ Why Manual Sequencing?
1017
+ We need explicit error handling between open() and bind() to ensure
1018
+ socket cleanup in the finally block. AUTO_BIND_TLS_BEFORE_BIND would
1019
+ combine steps 2-4 into a single auto_bind call, hiding exceptions and
1020
+ making it harder to guarantee socket cleanup on partial failures.
1021
+ Manual sequencing gives us fine-grained control over error paths.
1022
+
1023
+ TLS Modes:
1024
+ STARTTLS: start_tls() called before bind() to encrypt credentials
1025
+ LDAPS: TLS already active from Server (use_ssl=True), bind directly
1026
+ Plaintext: No TLS, bind directly (testing only)
1027
+
1028
+ Security Note:
1029
+ Skipping start_tls() for STARTTLS mode would transmit the password
1030
+ in plaintext despite TLS being "enabled" in configuration.
1031
+
1032
+ Exception Handling:
1033
+ LDAPInvalidCredentialsResult is caught and returns False (wrong password).
1034
+ Other LDAPExceptions (server errors, timeouts) are re-raised to trigger
1035
+ failover to the next server in _authenticate().
1036
+
1037
+ Args:
1038
+ server: Server object with TLS pre-configured.
1039
+ user_dn: User's Distinguished Name (e.g., "uid=alice,ou=users,dc=example,dc=com").
1040
+ password: User's password to verify.
1041
+
1042
+ Returns:
1043
+ bool: True if bind succeeds (password valid), False otherwise.
1044
+
1045
+ Raises:
1046
+ LDAPException: For connection/server errors (NOT invalid credentials).
1047
+ """
1048
+ user_conn = Connection(
1049
+ server,
1050
+ user=user_dn,
1051
+ password=password,
1052
+ auto_bind=AUTO_BIND_NONE, # No auto-bind; we call open/start_tls/bind manually
1053
+ raise_exceptions=True,
1054
+ receive_timeout=30, # Timeout for bind operation
1055
+ # SECURITY: Disable referral following to prevent credential leakage
1056
+ auto_referrals=False,
1057
+ )
1058
+ try:
1059
+ user_conn.open()
1060
+ # CRITICAL: Upgrade to TLS BEFORE sending password for STARTTLS mode
1061
+ if self.config.tls_mode == "starttls":
1062
+ user_conn.start_tls()
1063
+ user_conn.bind()
1064
+ return user_conn.bound
1065
+ except LDAPInvalidCredentialsResult:
1066
+ # Wrong password - return False instead of raising.
1067
+ # This prevents invalid credentials from triggering server failover
1068
+ # in _authenticate() (failover is for server errors, not auth failures).
1069
+ return False
1070
+ finally:
1071
+ # CRITICAL: Always unbind to prevent socket leak
1072
+ # Threat: If open() or start_tls() or bind() raises, connection has an open
1073
+ # socket but bound=False. Conditional cleanup (if user_conn.bound: unbind())
1074
+ # would skip cleanup, leaking the file descriptor. Repeated failed logins
1075
+ # would exhaust process FD limit (typically 1024) causing service crash.
1076
+ # unbind() safely closes socket regardless of bind state.
1077
+ user_conn.unbind() # type: ignore[no-untyped-call]
1078
+
1079
+ def _get_user_groups(self, conn: Connection, user_entry: Entry, username: str) -> list[str]:
1080
+ """Get user's group memberships.
1081
+
1082
+ Two modes are supported, determined by group_search_filter presence:
1083
+
1084
+ AD Mode (group_search_filter NOT set):
1085
+ Reads the memberOf attribute directly from the user entry.
1086
+ This is the recommended approach for Active Directory, which
1087
+ automatically populates memberOf with the user's group DNs.
1088
+
1089
+ Search Mode (group_search_filter IS set):
1090
+ Searches for groups that contain the user. Used for POSIX groups
1091
+ (posixGroup) or when memberOf is not available.
1092
+
1093
+ The %s placeholder in the filter is replaced with:
1094
+ - If group_search_filter_user_attr is set: That attribute's value
1095
+ from the user entry (e.g., uid="jdoe" or distinguishedName="...")
1096
+ - If not set: The login username directly
1097
+
1098
+ Common patterns:
1099
+ - POSIX (memberUid=%s): memberUid contains usernames like "jdoe"
1100
+ → Use username directly (default) or group_search_filter_user_attr=uid
1101
+ - groupOfNames (member=%s): member contains full DNs
1102
+ → Requires group_search_filter_user_attr=distinguishedName (AD only)
1103
+
1104
+ Size Limit Warning:
1105
+ If the LDAP server's size limit is exceeded (commonly 1000 entries),
1106
+ a warning is logged and partial results are returned. This can cause
1107
+ users to receive incorrect role mappings if their groups are not in
1108
+ the returned subset. Configure more specific group_search_base_dns
1109
+ or increase the server's sizelimit if this occurs.
1110
+
1111
+ Args:
1112
+ conn: Active LDAP connection (with service account if configured)
1113
+ user_entry: User entry from search
1114
+ username: User's login username (used as default filter value)
1115
+
1116
+ Returns:
1117
+ List of group DNs (Distinguished Names)
1118
+ """
1119
+ # Mode determined by group_search_filter presence
1120
+ if not self.config.group_search_filter:
1121
+ if not self.config.attr_member_of:
1122
+ return []
1123
+ # AD mode: Read memberOf attribute from user entry
1124
+ member_of = _get_attribute(user_entry, self.config.attr_member_of, multiple=True)
1125
+ return member_of if member_of else []
1126
+
1127
+ # POSIX mode: Search for groups containing this user
1128
+ groups: list[str] = []
1129
+ group_search_filter = self.config.group_search_filter
1130
+ if self.config.group_search_base_dns:
1131
+ # Determine what value to substitute for %s in the filter
1132
+ # - If group_search_filter_user_attr is set: Use that attribute's value
1133
+ # (e.g., "uid" -> "admin")
1134
+ # - If not set: Use the username
1135
+ #
1136
+ # POSIX memberUid contains usernames ("admin"), not full DNs.
1137
+ if self.config.group_search_filter_user_attr:
1138
+ # Get the specified attribute value from the user entry
1139
+ filter_value = _get_attribute(user_entry, self.config.group_search_filter_user_attr)
1140
+ if not filter_value:
1141
+ # Attribute not found on user - can't search for groups
1142
+ attr = self.config.group_search_filter_user_attr
1143
+ logger.warning(
1144
+ f"User entry missing attribute '{attr}' required for group search filter"
1145
+ )
1146
+ return []
1147
+ else:
1148
+ # use the username
1149
+ filter_value = username
1150
+
1151
+ # SECURITY: Escape value for LDAP filter (RFC 4515)
1152
+ # Threat: Values can contain special chars like parentheses, asterisks, backslashes
1153
+ # (e.g., "user(contractor)*"). If inserted into filter unescaped,
1154
+ # these could break filter syntax or allow injection. Always escape before
1155
+ # string substitution, even though value comes from trusted LDAP server.
1156
+ escaped_value = escape_filter_chars(filter_value)
1157
+ group_filter = group_search_filter.replace("%s", escaped_value)
1158
+
1159
+ # Search each group base DN and collect groups from all
1160
+ for group_search_base in self.config.group_search_base_dns:
1161
+ try:
1162
+ conn.search(
1163
+ search_base=group_search_base,
1164
+ search_filter=group_filter,
1165
+ search_scope=SUBTREE,
1166
+ attributes=["cn"],
1167
+ )
1168
+
1169
+ # Check if results were truncated by server's size limit
1170
+ # ldap3 doesn't raise for sizeLimitExceeded, it returns partial results
1171
+ if conn.result and conn.result.get("result") == RESULT_SIZE_LIMIT_EXCEEDED:
1172
+ logger.warning(
1173
+ f"LDAP group search hit server size limit for base "
1174
+ f"'{group_search_base}'. Results may be incomplete. "
1175
+ f"Consider using more specific group_search_base_dns or "
1176
+ f"increasing the server's sizelimit."
1177
+ )
1178
+
1179
+ for group_entry in conn.entries:
1180
+ groups.append(group_entry.entry_dn)
1181
+ except LDAPException as e:
1182
+ # SECURITY: Don't leak internal LDAP server error details
1183
+ logger.warning(
1184
+ f"LDAP group search failed for base '{group_search_base}'. "
1185
+ f"Error type: {type(e).__name__}"
1186
+ )
1187
+
1188
+ return groups
1189
+
1190
+ def map_groups_to_role(self, group_dns: list[str]) -> AssignableUserRoleName | None:
1191
+ """Map LDAP group DNs to Phoenix role.
1192
+
1193
+ Mapping Behavior:
1194
+ - Iterates through mappings in configuration order (first match wins)
1195
+ - Supports wildcard "*" to match all users
1196
+ - Case-insensitive DN matching per RFC 4514
1197
+ - DN normalization via canonicalize_dn to handle spacing/order/escape differences
1198
+
1199
+ Design Decision - First Match Wins vs. Highest Role Wins:
1200
+ This implementation uses "first match wins" (configuration order determines
1201
+ priority) rather than "highest role wins" (role hierarchy determines priority).
1202
+ This matches Grafana's LDAP behavior and is the common pattern in authorization
1203
+ systems (firewall rules, nginx routing, ACLs).
1204
+
1205
+ Rationale:
1206
+ 1. Explicit administrator control: Config order gives admins full control over
1207
+ precedence. Role-level priority locks you into a fixed hierarchy (ADMIN >
1208
+ MEMBER > VIEWER), but organizations may have complex access rules that don't
1209
+ map cleanly to role hierarchy.
1210
+
1211
+ 2. Simplicity and predictability: Easy to reason about ("whatever comes first
1212
+ in config wins") and easy to debug (just look at config order). No hidden
1213
+ logic comparing role levels.
1214
+
1215
+ 3. Industry convention: Matches behavior in firewalls (iptables), web servers
1216
+ (nginx location blocks), and access control lists. Administrators familiar
1217
+ with these systems expect "first match wins."
1218
+
1219
+ 4. No role hierarchy maintenance: Role-level priority requires defining and
1220
+ maintaining a hierarchy. What if custom roles are added later? First-match
1221
+ avoids this complexity entirely.
1222
+
1223
+ Trade-off:
1224
+ Misconfigured ordering can accidentally give users lower access than
1225
+ intended. This is considered acceptable because it's explicit and
1226
+ auditable in the configuration.
1227
+
1228
+ Configuration Best Practice:
1229
+ Order mappings from highest privilege to lowest:
1230
+ [
1231
+ {"group_dn": "cn=admins,ou=groups,dc=example,dc=com", "role": "ADMIN"},
1232
+ {"group_dn": "cn=developers,ou=groups,dc=example,dc=com", "role": "MEMBER"},
1233
+ {"group_dn": "*", "role": "VIEWER"} # Catch-all fallback
1234
+ ]
1235
+
1236
+ Args:
1237
+ group_dns: List of LDAP group DNs the user is a member of
1238
+
1239
+ Returns:
1240
+ Phoenix role name (ADMIN, MEMBER, VIEWER) or None if no match
1241
+
1242
+ See Also:
1243
+ Grafana's equivalent implementation:
1244
+ https://github.com/grafana/grafana/blob/main/pkg/services/ldap/ldap.go
1245
+ (buildGrafanaUser function, "only use the first match for each org" comment)
1246
+ """
1247
+ # Normalize user group DNs once to avoid repeated canonicalization
1248
+ # Filter out None values (DNs that failed to parse) with warning
1249
+ canonical_user_groups: set[str] = set()
1250
+ for dn in group_dns:
1251
+ canonical = canonicalize_dn(dn)
1252
+ if canonical is not None:
1253
+ canonical_user_groups.add(canonical)
1254
+ else:
1255
+ # Log warning but don't include the DN itself (may contain sensitive info)
1256
+ logger.warning(
1257
+ "Failed to canonicalize group DN from LDAP server. "
1258
+ "This group will be ignored for role mapping. "
1259
+ "This may indicate malformed data in the LDAP directory."
1260
+ )
1261
+
1262
+ # Iterate through mappings in priority order (first match wins)
1263
+ for mapping in self.config.group_role_mappings:
1264
+ group_dn = mapping["group_dn"]
1265
+ role = mapping["role"]
1266
+
1267
+ # Check if user matches this mapping
1268
+ if _is_member_of(canonical_user_groups, group_dn):
1269
+ return role # Already validated and normalized to uppercase at config load
1270
+
1271
+ # No matching groups - deny access
1272
+ return None
1273
+
1274
+
1275
+ @overload
1276
+ def _get_attribute(
1277
+ entry: Entry, attr_name: str, multiple: Literal[False] = False
1278
+ ) -> str | None: ...
1279
+
1280
+
1281
+ @overload
1282
+ def _get_attribute(entry: Entry, attr_name: str, multiple: Literal[True]) -> list[str] | None: ...
1283
+
1284
+
1285
+ def _get_attribute(entry: Entry, attr_name: str, multiple: bool = False) -> str | list[str] | None:
1286
+ """Safely extract attribute value from LDAP entry.
1287
+
1288
+ Args:
1289
+ entry: LDAP entry object
1290
+ attr_name: Attribute name to extract
1291
+ multiple: If True, return list of values; otherwise return first value
1292
+
1293
+ Returns:
1294
+ Attribute value(s) or None if not present
1295
+ """
1296
+ if not attr_name:
1297
+ return None
1298
+
1299
+ attr = getattr(entry, attr_name, None)
1300
+ if attr is None:
1301
+ return None
1302
+
1303
+ values = attr.values if hasattr(attr, "values") else []
1304
+ if not values:
1305
+ return None
1306
+
1307
+ if multiple:
1308
+ return list(values)
1309
+ return str(values[0])
1310
+
1311
+
1312
+ def _get_unique_id(entry: Entry, attr_name: str) -> str | None:
1313
+ """Extract unique identifier attribute, handling binary values.
1314
+
1315
+ Different LDAP servers store unique identifiers in different formats:
1316
+
1317
+ - Active Directory objectGUID: Binary (16 bytes, mixed-endian)
1318
+ - OpenLDAP entryUUID: String (RFC 4530)
1319
+ - 389 DS nsUniqueId: String
1320
+
1321
+ This method handles both binary and string formats, returning a
1322
+ standard UUID string representation for consistency.
1323
+
1324
+ IMPORTANT - Database Compatibility:
1325
+ The returned string is used as a database key for user lookup.
1326
+ To ensure consistent matching:
1327
+ - Output is always lowercase (UUIDs are case-insensitive per RFC 4122)
1328
+ - Whitespace is stripped
1329
+ - Empty values return None
1330
+
1331
+ If an existing database entry has different casing (e.g., uppercase
1332
+ from an older version), the user will be found via email fallback
1333
+ and their unique_id will be updated on next login.
1334
+
1335
+ Active Directory objectGUID Binary Format (MS-DTYP §2.3.4):
1336
+ Microsoft's GUID structure uses mixed-endian byte ordering:
1337
+
1338
+ | Field | Size | Endianness | Wire bytes for "2212e4c7-..." |
1339
+ |-------|---------|---------------|-------------------------------|
1340
+ | Data1 | 4 bytes | Little-endian | c7 e4 12 22 |
1341
+ | Data2 | 2 bytes | Little-endian | 1e 05 |
1342
+ | Data3 | 2 bytes | Little-endian | 0c 4d |
1343
+ | Data4 | 8 bytes | Big-endian | 9a 5b 12 77 0a 9b b7 ab |
1344
+
1345
+ Python's uuid.UUID(bytes_le=...) expects exactly this format.
1346
+
1347
+ References:
1348
+ - MS-DTYP §2.3.4: https://learn.microsoft.com/en-us/openspecs/windows_protocols/ms-dtyp/001eec5a-7f8b-4293-9e21-ca349392db40
1349
+ - MS-ADA3 objectGUID: https://learn.microsoft.com/en-us/openspecs/windows_protocols/ms-ada3/937eb5c6-f6b3-4652-a276-5d6bb8979658
1350
+ - RFC 4530 entryUUID: https://www.rfc-editor.org/rfc/rfc4530.html
1351
+
1352
+ Args:
1353
+ entry: LDAP entry object from ldap3
1354
+ attr_name: Attribute name (e.g., "objectGUID", "entryUUID")
1355
+
1356
+ Returns:
1357
+ String representation of the unique ID (lowercase UUID format),
1358
+ or None if not present or empty
1359
+ """ # noqa: E501
1360
+ if not attr_name:
1361
+ return None
1362
+
1363
+ attr = getattr(entry, attr_name, None)
1364
+ if attr is None:
1365
+ return None
1366
+
1367
+ # Get raw value - could be bytes (objectGUID) or str (entryUUID)
1368
+ # ldap3's decode_raw_vals (search.py:410-411) returns:
1369
+ # - [bytes(val) for val in vals] if vals has items (always bytes, never str)
1370
+ # - None if vals is empty/falsy (NOT an empty list)
1371
+ # The `and attr.raw_values` check handles both None and empty list cases.
1372
+ raw_value = attr.raw_values[0] if hasattr(attr, "raw_values") and attr.raw_values else None
1373
+ if raw_value is None:
1374
+ return None
1375
+
1376
+ # Handle binary values (AD objectGUID is 16 bytes)
1377
+ # ldap3 always returns bytes, but we accept bytearray/memoryview for defensive coding
1378
+ if isinstance(raw_value, (bytes, bytearray, memoryview)):
1379
+ raw_bytes = bytes(raw_value) # Normalize to bytes for uuid.UUID
1380
+
1381
+ # Empty bytes should return None, not empty string
1382
+ if len(raw_bytes) == 0:
1383
+ return None
1384
+
1385
+ if len(raw_bytes) == 16:
1386
+ import uuid
1387
+
1388
+ # ASSUMPTION: 16-byte values are binary UUIDs (e.g., AD objectGUID).
1389
+ # Custom 16-character string IDs are NOT supported - see LDAPConfig docs.
1390
+ #
1391
+ # MS-DTYP §2.3.4: GUID uses mixed-endian format
1392
+ # Data1/Data2/Data3 are little-endian, Data4 is big-endian
1393
+ # Python's bytes_le parameter handles this correctly
1394
+ # Note: uuid.UUID always returns lowercase
1395
+ return str(uuid.UUID(bytes_le=raw_bytes))
1396
+ else:
1397
+ # Non-16-byte value: likely a string UUID (e.g., OpenLDAP entryUUID)
1398
+ # OpenLDAP stores entryUUID as string "550e8400-e29b-41d4-a716-446655440000"
1399
+ # which comes as bytes b"550e8400-..." (36 bytes) - decode as UTF-8
1400
+ try:
1401
+ decoded = raw_bytes.decode("utf-8").strip()
1402
+ # Return None for empty strings after stripping
1403
+ if not decoded:
1404
+ return None
1405
+ # Normalize to lowercase for consistent DB lookups
1406
+ # (UUIDs are case-insensitive per RFC 4122 §3)
1407
+ return decoded.lower()
1408
+ except UnicodeDecodeError:
1409
+ # Truly binary format we don't recognize - hex encode for safety
1410
+ # Hex is already lowercase
1411
+ return raw_bytes.hex()
1412
+
1413
+ # String value (shouldn't happen with ldap3, but handle for safety)
1414
+ result = str(raw_value).strip()
1415
+ return result.lower() if result else None
1416
+
1417
+
1418
+ def _is_member_of(canonical_user_groups: set[str], target_group: str) -> bool:
1419
+ """Check if user is member of LDAP group.
1420
+
1421
+ Matching logic:
1422
+ - Wildcard "*" matches all users (useful for default roles)
1423
+ - Case-insensitive DN comparison per RFC 4514
1424
+ - Canonical DN comparison to account for spacing/order/escape differences
1425
+
1426
+ Args:
1427
+ canonical_user_groups: Set of canonicalized group DNs the user is a member of
1428
+ target_group: Target group DN to check (or "*" for wildcard)
1429
+
1430
+ Returns:
1431
+ True if user is a member of the target group, False otherwise.
1432
+ Returns False if target_group cannot be canonicalized (configuration error).
1433
+ """
1434
+ # Wildcard matches everyone
1435
+ if target_group == "*":
1436
+ return True
1437
+
1438
+ # Canonical comparison handles ordering/spacing/escaping differences
1439
+ target_canonical = canonicalize_dn(target_group)
1440
+ if target_canonical is None:
1441
+ # Configuration error: admin-provided group DN in PHOENIX_LDAP_GROUP_ROLE_MAPPINGS
1442
+ # cannot be parsed. Log error and return False (no match) to fail safely.
1443
+ logger.error(
1444
+ "Failed to canonicalize configured group DN in PHOENIX_LDAP_GROUP_ROLE_MAPPINGS. "
1445
+ "This mapping will never match. Check DN syntax in configuration."
1446
+ )
1447
+ return False
1448
+
1449
+ return target_canonical in canonical_user_groups