salesforce-data-customcode 4.0.0__tar.gz → 4.0.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (97) hide show
  1. {salesforce_data_customcode-4.0.0 → salesforce_data_customcode-4.0.2}/PKG-INFO +1 -1
  2. {salesforce_data_customcode-4.0.0 → salesforce_data_customcode-4.0.2}/pyproject.toml +1 -1
  3. salesforce_data_customcode-4.0.2/src/datacustomcode/__init__.py +47 -0
  4. {salesforce_data_customcode-4.0.0 → salesforce_data_customcode-4.0.2}/src/datacustomcode/client.py +2 -2
  5. {salesforce_data_customcode-4.0.0 → salesforce_data_customcode-4.0.2}/src/datacustomcode/function/feature_types/chunking.py +83 -45
  6. {salesforce_data_customcode-4.0.0 → salesforce_data_customcode-4.0.2}/src/datacustomcode/function/runtime.py +12 -5
  7. {salesforce_data_customcode-4.0.0 → salesforce_data_customcode-4.0.2}/src/datacustomcode/function_utils.py +25 -3
  8. {salesforce_data_customcode-4.0.0 → salesforce_data_customcode-4.0.2}/src/datacustomcode/io/reader/sf_cli.py +3 -1
  9. {salesforce_data_customcode-4.0.0 → salesforce_data_customcode-4.0.2}/src/datacustomcode/io/reader/utils.py +19 -19
  10. {salesforce_data_customcode-4.0.0 → salesforce_data_customcode-4.0.2}/src/datacustomcode/io/writer/csv.py +5 -1
  11. {salesforce_data_customcode-4.0.0 → salesforce_data_customcode-4.0.2}/src/datacustomcode/io/writer/print.py +8 -3
  12. salesforce_data_customcode-4.0.2/src/datacustomcode/llm_gateway/__init__.py +22 -0
  13. {salesforce_data_customcode-4.0.0 → salesforce_data_customcode-4.0.2}/src/datacustomcode/mixin.py +29 -0
  14. {salesforce_data_customcode-4.0.0 → salesforce_data_customcode-4.0.2}/src/datacustomcode/spark/base.py +1 -1
  15. {salesforce_data_customcode-4.0.0 → salesforce_data_customcode-4.0.2}/src/datacustomcode/spark/default.py +1 -1
  16. {salesforce_data_customcode-4.0.0 → salesforce_data_customcode-4.0.2}/src/datacustomcode/templates/function/chunking/payload/entrypoint.py +1 -3
  17. salesforce_data_customcode-4.0.2/src/datacustomcode/templates/function/example/chunking_with_llm/config.json +3 -0
  18. salesforce_data_customcode-4.0.2/src/datacustomcode/templates/function/example/chunking_with_llm/entrypoint.py +99 -0
  19. salesforce_data_customcode-4.0.2/src/datacustomcode/templates/function/example/chunking_with_llm/files/chunking_prompt.txt +19 -0
  20. salesforce_data_customcode-4.0.2/src/datacustomcode/templates/function/example/chunking_with_llm/tests/test.json +51 -0
  21. salesforce_data_customcode-4.0.0/src/datacustomcode/__init__.py +0 -27
  22. salesforce_data_customcode-4.0.0/src/datacustomcode/proxy/client/__init__.py +0 -14
  23. {salesforce_data_customcode-4.0.0 → salesforce_data_customcode-4.0.2}/LICENSE.txt +0 -0
  24. {salesforce_data_customcode-4.0.0 → salesforce_data_customcode-4.0.2}/README.md +0 -0
  25. {salesforce_data_customcode-4.0.0 → salesforce_data_customcode-4.0.2}/src/datacustomcode/auth.py +0 -0
  26. {salesforce_data_customcode-4.0.0 → salesforce_data_customcode-4.0.2}/src/datacustomcode/cli.py +0 -0
  27. {salesforce_data_customcode-4.0.0 → salesforce_data_customcode-4.0.2}/src/datacustomcode/cmd.py +0 -0
  28. {salesforce_data_customcode-4.0.0 → salesforce_data_customcode-4.0.2}/src/datacustomcode/common_config.py +0 -0
  29. {salesforce_data_customcode-4.0.0 → salesforce_data_customcode-4.0.2}/src/datacustomcode/config.py +0 -0
  30. {salesforce_data_customcode-4.0.0 → salesforce_data_customcode-4.0.2}/src/datacustomcode/config.yaml +0 -0
  31. {salesforce_data_customcode-4.0.0 → salesforce_data_customcode-4.0.2}/src/datacustomcode/constants.py +0 -0
  32. {salesforce_data_customcode-4.0.0 → salesforce_data_customcode-4.0.2}/src/datacustomcode/credentials.py +0 -0
  33. {salesforce_data_customcode-4.0.0 → salesforce_data_customcode-4.0.2}/src/datacustomcode/deploy.py +0 -0
  34. {salesforce_data_customcode-4.0.0 → salesforce_data_customcode-4.0.2}/src/datacustomcode/einstein_platform_client.py +0 -0
  35. {salesforce_data_customcode-4.0.0 → salesforce_data_customcode-4.0.2}/src/datacustomcode/einstein_platform_config.py +0 -0
  36. {salesforce_data_customcode-4.0.0 → salesforce_data_customcode-4.0.2}/src/datacustomcode/einstein_predictions/__init__.py +0 -0
  37. {salesforce_data_customcode-4.0.0 → salesforce_data_customcode-4.0.2}/src/datacustomcode/einstein_predictions/base.py +0 -0
  38. {salesforce_data_customcode-4.0.0 → salesforce_data_customcode-4.0.2}/src/datacustomcode/einstein_predictions/impl/default.py +0 -0
  39. {salesforce_data_customcode-4.0.0 → salesforce_data_customcode-4.0.2}/src/datacustomcode/einstein_predictions/types.py +0 -0
  40. {salesforce_data_customcode-4.0.0 → salesforce_data_customcode-4.0.2}/src/datacustomcode/einstein_predictions_config.py +0 -0
  41. {salesforce_data_customcode-4.0.0 → salesforce_data_customcode-4.0.2}/src/datacustomcode/file/__init__.py +0 -0
  42. {salesforce_data_customcode-4.0.0 → salesforce_data_customcode-4.0.2}/src/datacustomcode/file/base.py +0 -0
  43. {salesforce_data_customcode-4.0.0 → salesforce_data_customcode-4.0.2}/src/datacustomcode/file/path/__init__.py +0 -0
  44. {salesforce_data_customcode-4.0.0 → salesforce_data_customcode-4.0.2}/src/datacustomcode/file/path/default.py +0 -0
  45. {salesforce_data_customcode-4.0.0 → salesforce_data_customcode-4.0.2}/src/datacustomcode/function/__init__.py +0 -0
  46. {salesforce_data_customcode-4.0.0 → salesforce_data_customcode-4.0.2}/src/datacustomcode/function/base.py +0 -0
  47. {salesforce_data_customcode-4.0.0 → salesforce_data_customcode-4.0.2}/src/datacustomcode/function/feature_types/__init__.py +0 -0
  48. {salesforce_data_customcode-4.0.0 → salesforce_data_customcode-4.0.2}/src/datacustomcode/io/__init__.py +0 -0
  49. {salesforce_data_customcode-4.0.0 → salesforce_data_customcode-4.0.2}/src/datacustomcode/io/base.py +0 -0
  50. {salesforce_data_customcode-4.0.0 → salesforce_data_customcode-4.0.2}/src/datacustomcode/io/reader/__init__.py +0 -0
  51. {salesforce_data_customcode-4.0.0 → salesforce_data_customcode-4.0.2}/src/datacustomcode/io/reader/base.py +0 -0
  52. {salesforce_data_customcode-4.0.0 → salesforce_data_customcode-4.0.2}/src/datacustomcode/io/reader/query_api.py +0 -0
  53. {salesforce_data_customcode-4.0.0 → salesforce_data_customcode-4.0.2}/src/datacustomcode/io/writer/__init__.py +0 -0
  54. {salesforce_data_customcode-4.0.0 → salesforce_data_customcode-4.0.2}/src/datacustomcode/io/writer/base.py +0 -0
  55. {salesforce_data_customcode-4.0.0 → salesforce_data_customcode-4.0.2}/src/datacustomcode/llm_gateway/base.py +0 -0
  56. {salesforce_data_customcode-4.0.0 → salesforce_data_customcode-4.0.2}/src/datacustomcode/llm_gateway/default.py +0 -0
  57. {salesforce_data_customcode-4.0.0/src/datacustomcode/llm_gateway → salesforce_data_customcode-4.0.2/src/datacustomcode/llm_gateway/types}/__init__.py +0 -0
  58. {salesforce_data_customcode-4.0.0 → salesforce_data_customcode-4.0.2}/src/datacustomcode/llm_gateway/types/generate_text_request.py +0 -0
  59. {salesforce_data_customcode-4.0.0 → salesforce_data_customcode-4.0.2}/src/datacustomcode/llm_gateway/types/generate_text_request_builder.py +0 -0
  60. {salesforce_data_customcode-4.0.0 → salesforce_data_customcode-4.0.2}/src/datacustomcode/llm_gateway/types/generate_text_response.py +0 -0
  61. {salesforce_data_customcode-4.0.0 → salesforce_data_customcode-4.0.2}/src/datacustomcode/llm_gateway/types/generate_text_response_builder.py +0 -0
  62. {salesforce_data_customcode-4.0.0 → salesforce_data_customcode-4.0.2}/src/datacustomcode/llm_gateway_config.py +0 -0
  63. {salesforce_data_customcode-4.0.0/src/datacustomcode/llm_gateway/types → salesforce_data_customcode-4.0.2/src/datacustomcode/proxy}/__init__.py +0 -0
  64. {salesforce_data_customcode-4.0.0 → salesforce_data_customcode-4.0.2}/src/datacustomcode/proxy/base.py +0 -0
  65. {salesforce_data_customcode-4.0.0/src/datacustomcode/proxy → salesforce_data_customcode-4.0.2/src/datacustomcode/proxy/client}/__init__.py +0 -0
  66. {salesforce_data_customcode-4.0.0 → salesforce_data_customcode-4.0.2}/src/datacustomcode/proxy/client/base.py +0 -0
  67. {salesforce_data_customcode-4.0.0 → salesforce_data_customcode-4.0.2}/src/datacustomcode/py.typed +0 -0
  68. {salesforce_data_customcode-4.0.0 → salesforce_data_customcode-4.0.2}/src/datacustomcode/run.py +0 -0
  69. {salesforce_data_customcode-4.0.0 → salesforce_data_customcode-4.0.2}/src/datacustomcode/scan.py +0 -0
  70. {salesforce_data_customcode-4.0.0 → salesforce_data_customcode-4.0.2}/src/datacustomcode/spark/__init__.py +0 -0
  71. {salesforce_data_customcode-4.0.0 → salesforce_data_customcode-4.0.2}/src/datacustomcode/template.py +0 -0
  72. {salesforce_data_customcode-4.0.0 → salesforce_data_customcode-4.0.2}/src/datacustomcode/templates/__init__.py +0 -0
  73. {salesforce_data_customcode-4.0.0 → salesforce_data_customcode-4.0.2}/src/datacustomcode/templates/function/.devcontainer/devcontainer.json +0 -0
  74. {salesforce_data_customcode-4.0.0 → salesforce_data_customcode-4.0.2}/src/datacustomcode/templates/function/Dockerfile.dependencies +0 -0
  75. {salesforce_data_customcode-4.0.0 → salesforce_data_customcode-4.0.2}/src/datacustomcode/templates/function/README.md +0 -0
  76. {salesforce_data_customcode-4.0.0 → salesforce_data_customcode-4.0.2}/src/datacustomcode/templates/function/build_native_dependencies.sh +0 -0
  77. {salesforce_data_customcode-4.0.0 → salesforce_data_customcode-4.0.2}/src/datacustomcode/templates/function/chunking/payload/config.json +0 -0
  78. {salesforce_data_customcode-4.0.0 → salesforce_data_customcode-4.0.2}/src/datacustomcode/templates/function/chunking/requirements.txt +0 -0
  79. {salesforce_data_customcode-4.0.0 → salesforce_data_customcode-4.0.2}/src/datacustomcode/templates/function/payload/config.json +0 -0
  80. {salesforce_data_customcode-4.0.0 → salesforce_data_customcode-4.0.2}/src/datacustomcode/templates/function/payload/entrypoint.py +0 -0
  81. {salesforce_data_customcode-4.0.0 → salesforce_data_customcode-4.0.2}/src/datacustomcode/templates/function/requirements-dev.txt +0 -0
  82. {salesforce_data_customcode-4.0.0 → salesforce_data_customcode-4.0.2}/src/datacustomcode/templates/function/requirements.txt +0 -0
  83. {salesforce_data_customcode-4.0.0 → salesforce_data_customcode-4.0.2}/src/datacustomcode/templates/script/.devcontainer/devcontainer.json +0 -0
  84. {salesforce_data_customcode-4.0.0 → salesforce_data_customcode-4.0.2}/src/datacustomcode/templates/script/Dockerfile +0 -0
  85. {salesforce_data_customcode-4.0.0 → salesforce_data_customcode-4.0.2}/src/datacustomcode/templates/script/Dockerfile.dependencies +0 -0
  86. {salesforce_data_customcode-4.0.0 → salesforce_data_customcode-4.0.2}/src/datacustomcode/templates/script/README.md +0 -0
  87. {salesforce_data_customcode-4.0.0 → salesforce_data_customcode-4.0.2}/src/datacustomcode/templates/script/account.ipynb +0 -0
  88. {salesforce_data_customcode-4.0.0 → salesforce_data_customcode-4.0.2}/src/datacustomcode/templates/script/build_native_dependencies.sh +0 -0
  89. {salesforce_data_customcode-4.0.0 → salesforce_data_customcode-4.0.2}/src/datacustomcode/templates/script/examples/employee_hierarchy/employee_data.csv +0 -0
  90. {salesforce_data_customcode-4.0.0 → salesforce_data_customcode-4.0.2}/src/datacustomcode/templates/script/examples/employee_hierarchy/entrypoint.py +0 -0
  91. {salesforce_data_customcode-4.0.0 → salesforce_data_customcode-4.0.2}/src/datacustomcode/templates/script/jupyterlab.sh +0 -0
  92. {salesforce_data_customcode-4.0.0 → salesforce_data_customcode-4.0.2}/src/datacustomcode/templates/script/payload/config.json +0 -0
  93. {salesforce_data_customcode-4.0.0 → salesforce_data_customcode-4.0.2}/src/datacustomcode/templates/script/payload/entrypoint.py +0 -0
  94. {salesforce_data_customcode-4.0.0 → salesforce_data_customcode-4.0.2}/src/datacustomcode/templates/script/requirements-dev.txt +0 -0
  95. {salesforce_data_customcode-4.0.0 → salesforce_data_customcode-4.0.2}/src/datacustomcode/templates/script/requirements.txt +0 -0
  96. {salesforce_data_customcode-4.0.0 → salesforce_data_customcode-4.0.2}/src/datacustomcode/token_provider.py +0 -0
  97. {salesforce_data_customcode-4.0.0 → salesforce_data_customcode-4.0.2}/src/datacustomcode/version.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: salesforce-data-customcode
3
- Version: 4.0.0
3
+ Version: 4.0.2
4
4
  Summary: Data Cloud Custom Code SDK
5
5
  License-Expression: Apache-2.0
6
6
  License-File: LICENSE.txt
@@ -18,7 +18,7 @@ license = "Apache-2.0"
18
18
  name = "salesforce-data-customcode"
19
19
  readme = "README.md"
20
20
  requires-python = ">=3.10,<3.12"
21
- version = "4.0.0"
21
+ version = "4.0.2"
22
22
 
23
23
  [tool.black]
24
24
  exclude = '''
@@ -0,0 +1,47 @@
1
+ # Copyright (c) 2025, Salesforce, Inc.
2
+ # SPDX-License-Identifier: Apache-2
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+
16
+ __all__ = [
17
+ "AuthType",
18
+ "Client",
19
+ "Credentials",
20
+ "PrintDataCloudWriter",
21
+ "QueryAPIDataCloudReader",
22
+ ]
23
+
24
+
25
+ def __getattr__(name: str):
26
+ """Lazy import heavy dependencies."""
27
+ if name == "Client":
28
+ from datacustomcode.client import Client
29
+
30
+ return Client
31
+ elif name == "AuthType":
32
+ from datacustomcode.credentials import AuthType
33
+
34
+ return AuthType
35
+ elif name == "Credentials":
36
+ from datacustomcode.credentials import Credentials
37
+
38
+ return Credentials
39
+ elif name == "PrintDataCloudWriter":
40
+ from datacustomcode.io.writer.print import PrintDataCloudWriter
41
+
42
+ return PrintDataCloudWriter
43
+ elif name == "QueryAPIDataCloudReader":
44
+ from datacustomcode.io.reader.query_api import QueryAPIDataCloudReader
45
+
46
+ return QueryAPIDataCloudReader
47
+ raise AttributeError(f"module {__name__!r} has no attribute {name!r}")
@@ -112,8 +112,8 @@ class Client:
112
112
  def __new__(
113
113
  cls,
114
114
  reader: Optional[BaseDataCloudReader] = None,
115
- writer: Optional["BaseDataCloudWriter"] = None,
116
- spark_provider: Optional["BaseSparkSessionProvider"] = None,
115
+ writer: Optional[BaseDataCloudWriter] = None,
116
+ spark_provider: Optional[BaseSparkSessionProvider] = None,
117
117
  code_type: str = "script",
118
118
  ) -> Client:
119
119
 
@@ -50,16 +50,16 @@ class ChunkType(str, Enum):
50
50
  class SearchIndexChunkingV1PrependField(BaseModel):
51
51
  """Field to prepend to chunk content"""
52
52
 
53
- dmo_name: str = Field(
54
- default="", description="Data Model Object name", examples=["udmo_1__dlm"]
53
+ dmo_name: Optional[str] = Field(
54
+ default=None, description="Data Model Object name", examples=["udmo_1__dlm"]
55
55
  )
56
- field_name: str = Field(
57
- default="",
56
+ field_name: Optional[str] = Field(
57
+ default=None,
58
58
  description="Field name to prepend",
59
59
  examples=["ResolvedFilePath__c"],
60
60
  )
61
- value: str = Field(
62
- default="",
61
+ value: Optional[str] = Field(
62
+ default=None,
63
63
  description="Field value to prepend",
64
64
  examples=["udlo_1__dll:quarterly_report.pdf"],
65
65
  )
@@ -67,20 +67,20 @@ class SearchIndexChunkingV1PrependField(BaseModel):
67
67
 
68
68
 
69
69
  class SearchIndexChunkingV1TranscriptField(BaseModel):
70
- """Field to prepend to chunk content"""
70
+ """Transcript timing and speaker metadata for audio/video documents"""
71
71
 
72
- speaker: str = Field(
73
- default="",
72
+ speaker: Optional[str] = Field(
73
+ default=None,
74
74
  description="Speaker name for audio/video transcripts",
75
75
  examples=["Agent"],
76
76
  )
77
- start_timestamp: str = Field(
78
- default="",
77
+ start_timestamp: Optional[str] = Field(
78
+ default=None,
79
79
  description="Start timestamp in ISO8601 format: YYYY-MM-DDTHH:MM:SS.ffffff",
80
80
  examples=["2026-03-25T02:01:24.918000"],
81
81
  )
82
- end_timestamp: str = Field(
83
- default="",
82
+ end_timestamp: Optional[str] = Field(
83
+ default=None,
84
84
  description="End timestamp in ISO8601 format: YYYY-MM-DDTHH:MM:SS.ffffff",
85
85
  examples=["2026-03-25T02:01:30.500000"],
86
86
  )
@@ -88,44 +88,76 @@ class SearchIndexChunkingV1TranscriptField(BaseModel):
88
88
 
89
89
 
90
90
  class SearchIndexChunkingV1Metadata(BaseModel):
91
- """Metadata for input documents"""
91
+ """Metadata for input documents."""
92
92
 
93
- type: DocumentType = Field(
94
- default=DocumentType.TEXT, description="Document type (text)", examples=["text"]
95
- )
96
- transcript_fields: SearchIndexChunkingV1TranscriptField = Field(
97
- default_factory=SearchIndexChunkingV1TranscriptField,
93
+ type: Optional[DocumentType] = Field(
94
+ default=DocumentType.TEXT,
98
95
  description=(
99
- "Transcript information. Will only be there in case of audio-video files"
96
+ "Document type of the chunk input. Currently only 'text' is supported."
100
97
  ),
98
+ examples=["text"],
101
99
  )
102
- page_number: int = Field(
103
- default=0,
104
- description="Page number in the source document (0-based)",
100
+ page_number: Optional[int] = Field(
101
+ default=None,
102
+ description=("Page number in the source document (0-based). "),
105
103
  examples=[1],
106
104
  )
105
+ transcript_fields: Optional[SearchIndexChunkingV1TranscriptField] = Field(
106
+ default=None,
107
+ description=(
108
+ "Speaker and timestamp metadata for audio/video transcripts. "
109
+ "Optional — only present when the source document is a transcript."
110
+ ),
111
+ )
107
112
  text_as_html: Optional[str] = Field(
108
113
  default=None,
109
- description="HTML representation of the document text",
114
+ description=("HTML representation of the chunk text, if available. "),
110
115
  examples=["<p>Online Remittance Instructions</p>"],
111
116
  )
112
- source_dmo_fields: Dict[str, Union[str, int]] = Field(
113
- default_factory=dict,
117
+ source_dmo_fields: Optional[Dict[str, Union[str, int, float]]] = Field(
118
+ default=None,
114
119
  description=(
115
- "Source Data Model Object fields as key-value pairs "
116
- "(values can be string or int)"
120
+ "Source Data Model Object fields as key-value pairs. "
121
+ "Values can be string, int, or float."
117
122
  ),
118
123
  examples=[
119
124
  {
120
125
  "FilePath__c": "quarterly_report.pdf",
121
- "Size__c": 1377454,
126
+ "Size__c": 1377454.0,
122
127
  "ContentType__c": "pdf",
123
128
  "LastModified__c": "2026-03-25T02:01:24.918000",
124
129
  }
125
130
  ],
126
131
  )
127
- prepend: List[SearchIndexChunkingV1PrependField] = Field(
128
- default_factory=list, description="List of fields to prepend to each chunk"
132
+ prepend: Optional[List[SearchIndexChunkingV1PrependField]] = Field(
133
+ default=None,
134
+ description=(
135
+ "List of DMO fields whose values are prepended to the chunk "
136
+ "text before indexing"
137
+ ),
138
+ )
139
+ image_base64: Optional[str] = Field(
140
+ default=None,
141
+ description=(
142
+ "Base64-encoded image data associated with this chunk. "
143
+ "Optional — only applicable for image-type document elements."
144
+ ),
145
+ )
146
+ image_mime_type: Optional[str] = Field(
147
+ default=None,
148
+ description=(
149
+ "MIME type of the associated image (e.g., 'image/png', 'image/jpeg'). "
150
+ "Optional — should be provided alongside image_base64 when present."
151
+ ),
152
+ examples=["image/png", "image/jpeg"],
153
+ )
154
+ image_type: Optional[str] = Field(
155
+ default=None,
156
+ description=(
157
+ "Semantic category of the image content"
158
+ "(e.g., 'diagram', 'screenshot', 'chart'). Optional."
159
+ ),
160
+ examples=["diagram", "screenshot"],
129
161
  )
130
162
  model_config = ConfigDict(extra="ignore")
131
163
 
@@ -143,9 +175,12 @@ class SearchIndexChunkingV1DocElement(BaseModel):
143
175
  )
144
176
  ],
145
177
  )
146
- metadata: SearchIndexChunkingV1Metadata = Field(
147
- default_factory=SearchIndexChunkingV1Metadata,
148
- description="Source document metadata",
178
+ metadata: Optional[SearchIndexChunkingV1Metadata] = Field(
179
+ default=None,
180
+ description=(
181
+ "Source document metadata. Optional — may be absent if no "
182
+ "metadata is available for the document element."
183
+ ),
149
184
  )
150
185
  model_config = ConfigDict(extra="ignore")
151
186
 
@@ -159,21 +194,25 @@ class SearchIndexChunkingV1Output(BaseModel):
159
194
  examples=["Online Remittance Instructions"],
160
195
  )
161
196
  seq_no: int = Field(
162
- default=0, description="Sequential chunk number (1-based)", ge=1, examples=[1]
163
- )
164
- chunk_id: str = Field(
165
- default="",
166
- description="Unique identifier for this chunk (UUID format)",
167
- examples=["550e8400-e29b-41d4-a716-446655440000"],
197
+ default=0,
198
+ description=(
199
+ "Sequential order of this chunk within the output "
200
+ "Represents chunk ordering within the source document (1-based)."
201
+ ),
202
+ ge=1,
203
+ examples=[1],
168
204
  )
169
205
  chunk_type: ChunkType = Field(
170
206
  default=ChunkType.TEXT,
171
- description="Type of chunk (e.g., 'text')",
207
+ description="Type of chunk. Fixed value — always 'text'.",
172
208
  examples=["text"],
173
209
  )
174
- citations: Dict[str, str] = Field(
175
- default_factory=dict,
176
- description="Citation information as key-value pairs",
210
+ citations: Optional[Dict[str, str]] = Field(
211
+ default=None,
212
+ description=(
213
+ "Citation metadata associated with this chunk as key-value "
214
+ "pairs. Optional — defaults to None if no citations are present."
215
+ ),
177
216
  examples=[{"source": "quarterly_report.pdf"}],
178
217
  )
179
218
  model_config = ConfigDict(extra="ignore")
@@ -194,4 +233,3 @@ class SearchIndexChunkingV1Response(BaseModel):
194
233
  output: List[SearchIndexChunkingV1Output] = Field(
195
234
  default_factory=list, description="Flat list of chunks from all docs"
196
235
  )
197
- model_config = ConfigDict(extra="ignore")
@@ -21,7 +21,8 @@ from datacustomcode.einstein_predictions.base import EinsteinPredictions
21
21
  from datacustomcode.einstein_predictions_config import einstein_predictions_config
22
22
  from datacustomcode.file.path.default import DefaultFindFilePath
23
23
  from datacustomcode.function.base import BaseRuntime
24
- from datacustomcode.llm_gateway.default import DefaultLLMGateway
24
+ from datacustomcode.llm_gateway.base import LLMGateway
25
+ from datacustomcode.llm_gateway_config import llm_gateway_config
25
26
 
26
27
 
27
28
  class Runtime(BaseRuntime):
@@ -46,7 +47,7 @@ class Runtime(BaseRuntime):
46
47
  raise RuntimeError(
47
48
  "Runtime can only be instantiated once by the SDK.\n\n"
48
49
  "Do not instantiate it yourself. Accept it as a parameter:\n\n"
49
- " from datacustomcode.runtime.function.RunTime import Function\n"
50
+ " from datacustomcode.function.runtime import Runtime\n"
50
51
  " \n"
51
52
  " def function(request: dict, runtime: Runtime) -> dict:\n"
52
53
  " response = {...}\n"
@@ -65,13 +66,19 @@ class Runtime(BaseRuntime):
65
66
  super().__init__()
66
67
 
67
68
  # Initialize resources
68
- self._llm_gateway = DefaultLLMGateway()
69
+ self._llm_gateway: Optional[LLMGateway] = None
69
70
  self._file = DefaultFindFilePath()
70
71
  self._einstein_predictions: Optional[EinsteinPredictions] = None
71
72
 
72
73
  @property
73
- def llm_gateway(self) -> DefaultLLMGateway:
74
- """Access LLM operations."""
74
+ def llm_gateway(self) -> LLMGateway:
75
+ if self._llm_gateway is None:
76
+ if llm_gateway_config.llm_gateway_config is None:
77
+ raise RuntimeError(
78
+ "LLM Gateway is not configured. "
79
+ "Add 'llm_gateway_config' section to config.yaml"
80
+ )
81
+ self._llm_gateway = llm_gateway_config.llm_gateway_config.to_object()
75
82
  return self._llm_gateway
76
83
 
77
84
  @property
@@ -16,6 +16,7 @@
16
16
  """Utilities for inspecting and working with function entrypoints."""
17
17
 
18
18
  import ast
19
+ from enum import Enum
19
20
  import importlib.util
20
21
  import inspect
21
22
  import json
@@ -278,11 +279,17 @@ def _generate_model_sample_data(model_type):
278
279
  # Use examples if available
279
280
  if field_info.examples and len(field_info.examples) > 0:
280
281
  sample_data[field_name] = field_info.examples[0]
281
- # Check if field has a real default value
282
- elif field_info.default is not PydanticUndefined:
282
+ # If field has a non-None, non-empty default value, use it
283
+ elif (
284
+ field_info.default is not PydanticUndefined
285
+ and field_info.default is not None
286
+ and field_info.default != []
287
+ and field_info.default != {}
288
+ ):
283
289
  sample_data[field_name] = field_info.default
290
+ # For all other fields (including default_factory, None defaults,
291
+ # empty defaults), generate sample data
284
292
  else:
285
- # Required field or field without default - generate sample
286
293
  sample_data[field_name] = generate_sample_value(
287
294
  field_info.annotation, field_name
288
295
  )
@@ -301,6 +308,17 @@ def generate_sample_value(field_type, field_name: str):
301
308
  """
302
309
  origin = typing.get_origin(field_type)
303
310
 
311
+ # Handle Optional[T] (Union[T, None]) by unwrapping to T
312
+ if origin is typing.Union:
313
+ non_none_args = [
314
+ arg for arg in typing.get_args(field_type) if arg is not type(None)
315
+ ]
316
+ return (
317
+ generate_sample_value(non_none_args[0], field_name)
318
+ if non_none_args
319
+ else None
320
+ )
321
+
304
322
  if origin is list or field_type is list:
305
323
  args = typing.get_args(field_type)
306
324
  if args:
@@ -320,6 +338,10 @@ def generate_sample_value(field_type, field_name: str):
320
338
  return 1.0
321
339
  elif field_type is bool:
322
340
  return True
341
+ # Handle Enum types
342
+ elif isinstance(field_type, type) and issubclass(field_type, Enum):
343
+ # Return the first enum value
344
+ return next(iter(field_type)).value
323
345
  elif hasattr(field_type, "model_fields"):
324
346
  # Nested Pydantic model - use shared helper
325
347
  return _generate_model_sample_data(field_type)
@@ -23,7 +23,6 @@ from typing import (
23
23
  Union,
24
24
  )
25
25
 
26
- import pandas as pd
27
26
  import requests
28
27
 
29
28
  from datacustomcode.io.reader.base import BaseDataCloudReader
@@ -31,6 +30,7 @@ from datacustomcode.io.reader.utils import _pandas_to_spark_schema
31
30
  from datacustomcode.token_provider import SFCLITokenProvider
32
31
 
33
32
  if TYPE_CHECKING:
33
+ import pandas as pd
34
34
  from pyspark.sql import DataFrame as PySparkDataFrame, SparkSession
35
35
  from pyspark.sql.types import AtomicType, StructType
36
36
 
@@ -97,6 +97,8 @@ class SFCLIDataCloudReader(BaseDataCloudReader):
97
97
  Raises:
98
98
  RuntimeError: On HTTP errors or unexpected response shapes.
99
99
  """
100
+ import pandas as pd
101
+
100
102
  access_token, instance_url = self._get_token()
101
103
 
102
104
  url = f"{instance_url}/services/data/{API_VERSION}/ssot/query-sql"
@@ -16,32 +16,32 @@ from __future__ import annotations
16
16
 
17
17
  from typing import TYPE_CHECKING
18
18
 
19
- import pandas.api.types as pd_types
20
- from pyspark.sql.types import (
21
- BooleanType,
22
- DoubleType,
23
- LongType,
24
- StringType,
25
- StructField,
26
- StructType,
27
- TimestampType,
28
- )
29
-
30
19
  if TYPE_CHECKING:
31
20
  import pandas
32
- from pyspark.sql.types import AtomicType
33
-
34
- PANDAS_TYPE_MAPPING = {
35
- "object": StringType(),
36
- "int64": LongType(),
37
- "float64": DoubleType(),
38
- "bool": BooleanType(),
39
- }
21
+ from pyspark.sql.types import AtomicType, StructType
40
22
 
41
23
 
42
24
  def _pandas_to_spark_schema(
43
25
  pandas_df: pandas.DataFrame, nullable: bool = True
44
26
  ) -> StructType:
27
+ import pandas.api.types as pd_types
28
+ from pyspark.sql.types import (
29
+ BooleanType,
30
+ DoubleType,
31
+ LongType,
32
+ StringType,
33
+ StructField,
34
+ StructType,
35
+ TimestampType,
36
+ )
37
+
38
+ PANDAS_TYPE_MAPPING = {
39
+ "object": StringType(),
40
+ "int64": LongType(),
41
+ "float64": DoubleType(),
42
+ "bool": BooleanType(),
43
+ }
44
+
45
45
  fields = []
46
46
  for column, dtype in pandas_df.dtypes.items():
47
47
  spark_type: AtomicType
@@ -13,8 +13,12 @@
13
13
  # See the License for the specific language governing permissions and
14
14
  # limitations under the License.
15
15
 
16
+ from __future__ import annotations
16
17
 
17
- from pyspark.sql import DataFrame as PySparkDataFrame
18
+ from typing import TYPE_CHECKING
19
+
20
+ if TYPE_CHECKING:
21
+ from pyspark.sql import DataFrame as PySparkDataFrame
18
22
 
19
23
  from datacustomcode.io.writer.base import BaseDataCloudWriter, WriteMode
20
24
 
@@ -13,12 +13,15 @@
13
13
  # See the License for the specific language governing permissions and
14
14
  # limitations under the License.
15
15
 
16
+ from __future__ import annotations
16
17
 
17
- from typing import Optional
18
+ from typing import TYPE_CHECKING, Optional
18
19
 
19
- from pyspark.sql import DataFrame as PySparkDataFrame, SparkSession
20
+ if TYPE_CHECKING:
21
+ from pyspark.sql import DataFrame as PySparkDataFrame, SparkSession
22
+
23
+ from datacustomcode.io.reader.query_api import QueryAPIDataCloudReader
20
24
 
21
- from datacustomcode.io.reader.query_api import QueryAPIDataCloudReader
22
25
  from datacustomcode.io.writer.base import BaseDataCloudWriter, WriteMode
23
26
 
24
27
 
@@ -61,6 +64,8 @@ class PrintDataCloudWriter(BaseDataCloudWriter):
61
64
  sf_cli_org: Optional SF CLI org alias or username. If provided,
62
65
  credentials are fetched via `sf org display`.
63
66
  """
67
+ from datacustomcode.io.reader.query_api import QueryAPIDataCloudReader
68
+
64
69
  super().__init__(spark)
65
70
  if reader is None:
66
71
  self.reader = QueryAPIDataCloudReader(
@@ -0,0 +1,22 @@
1
+ # Copyright (c) 2025, Salesforce, Inc.
2
+ # SPDX-License-Identifier: Apache-2
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+
16
+ from datacustomcode.llm_gateway.base import LLMGateway
17
+ from datacustomcode.llm_gateway.default import DefaultLLMGateway
18
+
19
+ __all__ = [
20
+ "DefaultLLMGateway",
21
+ "LLMGateway",
22
+ ]
@@ -72,6 +72,35 @@ class UserExtendableNamedConfigMixin:
72
72
  Args:
73
73
  config_name: should match a subclass's ``CONFIG_NAME``.
74
74
  """
75
+ # First, check if already registered (from __init_subclass__)
76
+ if config_name in UserExtendableNamedConfigMixin._registered_config_names:
77
+ candidate = UserExtendableNamedConfigMixin._registered_config_names[
78
+ config_name
79
+ ]
80
+ # Verify it's actually a subclass of cls (respects hierarchy)
81
+ if candidate is cls or issubclass(candidate, cls):
82
+ return candidate
83
+
84
+ # If not found, try to trigger lazy import via __getattr__
85
+ # This handles the case where subclasses use lazy loading
86
+ try:
87
+ import datacustomcode
88
+
89
+ # Attempt to trigger __getattr__ by accessing the name
90
+ getattr(datacustomcode, config_name, None)
91
+ except (ImportError, AttributeError):
92
+ pass
93
+
94
+ # Check again after potential lazy import
95
+ if config_name in UserExtendableNamedConfigMixin._registered_config_names:
96
+ candidate = UserExtendableNamedConfigMixin._registered_config_names[
97
+ config_name
98
+ ]
99
+ # Verify it's actually a subclass of cls (respects hierarchy)
100
+ if candidate is cls or issubclass(candidate, cls):
101
+ return candidate
102
+
103
+ # Fallback to dynamic lookup (for user-added subclasses)
75
104
  subclass_config_name_map = {}
76
105
  for type_ in _get_all_subclass_descendants(cls):
77
106
  if name := getattr(type_, "CONFIG_NAME", ""):
@@ -25,5 +25,5 @@ if TYPE_CHECKING:
25
25
 
26
26
 
27
27
  class BaseSparkSessionProvider(UserExtendableNamedConfigMixin):
28
- def get_session(self, spark_config: SparkConfig) -> "SparkSession":
28
+ def get_session(self, spark_config: SparkConfig) -> SparkSession:
29
29
  raise NotImplementedError
@@ -27,7 +27,7 @@ if TYPE_CHECKING:
27
27
  class DefaultSparkSessionProvider(BaseSparkSessionProvider):
28
28
  CONFIG_NAME = "DefaultSparkSessionProvider"
29
29
 
30
- def get_session(self, spark_config: SparkConfig) -> "SparkSession":
30
+ def get_session(self, spark_config: SparkConfig) -> SparkSession:
31
31
  from pyspark.sql import SparkSession
32
32
 
33
33
  builder = SparkSession.builder
@@ -1,5 +1,4 @@
1
1
  import logging
2
- import uuid
3
2
 
4
3
  from datacustomcode.function import Runtime
5
4
  from datacustomcode.function.feature_types.chunking import (
@@ -124,12 +123,11 @@ def function(
124
123
  for chunk_text in text_chunks:
125
124
  # Create citations from source_dmo_fields if available
126
125
  citations = {}
127
- if metadata.source_dmo_fields:
126
+ if metadata and metadata.source_dmo_fields:
128
127
  for key, value in metadata.source_dmo_fields.items():
129
128
  citations[key] = str(value)
130
129
 
131
130
  chunk_output = SearchIndexChunkingV1Output(
132
- chunk_id=str(uuid.uuid4()),
133
131
  chunk_type=ChunkType.TEXT,
134
132
  text=chunk_text.strip(),
135
133
  seq_no=seq_no,
@@ -0,0 +1,99 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ Sample Search Index Chunking Customer Function
4
+
5
+ This function demonstrates the new signature-based invocation with Pydantic models:
6
+ - Uses SearchIndexChunkingV1Request/Response (Pydantic models)
7
+ - Requires Runtime parameter (for agentic capabilities)
8
+ - Type-safe with direct field access (no wrappers)
9
+ - Automatic validation and conversion
10
+ """
11
+
12
+ import logging
13
+
14
+ from datacustomcode.function.feature_types.chunking import (
15
+ ChunkType,
16
+ SearchIndexChunkingV1Output,
17
+ SearchIndexChunkingV1Request,
18
+ SearchIndexChunkingV1Response,
19
+ )
20
+ from datacustomcode.function.runtime import Runtime
21
+ from datacustomcode.llm_gateway.types.generate_text_request_builder import (
22
+ GenerateTextRequestBuilder,
23
+ )
24
+
25
+ logger = logging.getLogger(__name__)
26
+ logging.basicConfig(level=logging.INFO)
27
+
28
+
29
+ def _load_prompt_template(runtime: Runtime) -> str:
30
+ """Load the chunking prompt template from file."""
31
+ prompt_file = runtime.file.find_file_path("chunking_prompt.txt")
32
+ with open(prompt_file, "r") as f:
33
+ _prompt_template_cache = f.read()
34
+ logger.info(f"Loaded prompt template from {prompt_file}")
35
+ return _prompt_template_cache
36
+
37
+
38
+ def function(
39
+ request: SearchIndexChunkingV1Request, runtime: Runtime
40
+ ) -> SearchIndexChunkingV1Response:
41
+ """
42
+ Chunk documents for Search Index.
43
+
44
+ Args:
45
+ request: SearchIndexChunkingV1Request with input documents
46
+ runtime: Runtime instance for agentic capabilities (future use)
47
+
48
+ Returns:
49
+ SearchIndexChunkingV1Response with chunked output
50
+ """
51
+ logger.info(f"Received {len(request.input)} documents to chunk")
52
+
53
+ # Load prompt template (cached after first call)
54
+ prompt_template = _load_prompt_template(runtime)
55
+
56
+ chunks = []
57
+ chunk_id = 1
58
+
59
+ # Process each document
60
+ for doc_idx, doc in enumerate(request.input):
61
+ # Direct field access - no wrappers!
62
+ text = doc.text
63
+
64
+ # Use LLM to intelligently chunk the document
65
+ # This creates semantic chunks that preserve context and meaning
66
+ prompt = prompt_template.format(text=text)
67
+
68
+ builder = GenerateTextRequestBuilder()
69
+ llm_request = (
70
+ builder.set_model("sfdc_ai__DefaultGPT4Turbo").set_prompt(prompt).build()
71
+ )
72
+ response = runtime.llm_gateway.generate_text(llm_request)
73
+
74
+ if response.is_success:
75
+ # Parse LLM response to extract chunks
76
+ llm_chunks = response.text.split("---CHUNK---")
77
+ llm_chunks = [chunk.strip() for chunk in llm_chunks if chunk.strip()]
78
+
79
+ # Create chunk outputs
80
+ for chunk_text in llm_chunks:
81
+ chunk = SearchIndexChunkingV1Output(
82
+ text=chunk_text,
83
+ seq_no=chunk_id,
84
+ chunk_type=ChunkType.TEXT,
85
+ citations={},
86
+ )
87
+ chunks.append(chunk)
88
+ chunk_id += 1
89
+
90
+ else:
91
+ # LLM chunking failed - log error and raise exception
92
+ error_msg = (
93
+ f"LLM chunking failed for document {doc_idx + 1}: {response.error_code}"
94
+ )
95
+ logger.error(error_msg)
96
+ raise RuntimeError(error_msg)
97
+
98
+ # Return Pydantic response
99
+ return SearchIndexChunkingV1Response(output=chunks)
@@ -0,0 +1,19 @@
1
+ Analyze this document and break it into logical chunks for search/retrieval.
2
+
3
+ Rules:
4
+ 1. Each chunk should be 150-300 words
5
+ 2. Break at semantic/topic boundaries (not mid-sentence)
6
+ 3. Each chunk should be self-contained (understandable alone)
7
+ 4. Preserve important context in each chunk
8
+
9
+ Format your response as chunks separated by "---CHUNK---" markers.
10
+
11
+ Document:
12
+ {text}
13
+
14
+ Output format:
15
+ <chunk 1 text>
16
+ ---CHUNK---
17
+ <chunk 2 text>
18
+ ---CHUNK---
19
+ ...
@@ -0,0 +1,51 @@
1
+ {
2
+ "input": [
3
+ {
4
+ "text": "Employee Stock Ownership Plan (ESOP) Guide\n\nWhat is an ESOP?\n\nAn Employee Stock Ownership Plan (ESOP) is a qualified retirement plan that invests primarily in the stock of the sponsoring employer. ESOPs are designed to provide employees with an ownership interest in the company, aligning their interests with those of shareholders. As the company grows and prospers, so does the value of the ESOP shares held by employees.\n\nHow ESOPs Work\n\nWhen you join a company with an ESOP, you become eligible to participate after meeting certain requirements, typically one year of service. The company makes contributions to your ESOP account, usually as a percentage of your compensation. These contributions are made in the form of company stock or cash that is used to purchase company stock. The shares are held in a trust account in your name.\n\nVesting Schedule and Ownership\n\nYour ownership of ESOP shares typically follows a vesting schedule. A common vesting schedule is 20% per year over five years, meaning you become fully vested after five years of service. Once vested, those shares belong to you even if you leave the company. If you leave before becoming fully vested, you forfeit the unvested portion.\n\nDistribution Rules and Tax Implications\n\nWhen you leave the company, retire, or meet other distribution trigger events, you are entitled to receive the value of your vested ESOP shares. Distributions typically begin in the year following your separation from service. You can choose to receive distributions in a lump sum or in installments over several years. The tax treatment depends on how you receive the distribution - rolling over to an IRA defers taxes, while direct distributions are taxed as ordinary income.\n\nSelling Your ESOP Shares\n\nFor privately held companies, the ESOP trust or the company itself typically repurchases your shares at fair market value, determined by an independent appraiser. For publicly traded companies, shares may be sold on the open market. The repurchase obligation ensures you can convert your ownership stake to cash when you leave the company, providing liquidity for what might otherwise be an illiquid investment.",
5
+ "metadata": {
6
+ "type": "text",
7
+ "page_number": 1,
8
+ "text_as_html": null,
9
+ "source_dmo_fields": {
10
+ "FilePath__c": "employee_handbook/esop_guide.pdf",
11
+ "Size__c": 2847521,
12
+ "ContentType__c": "pdf",
13
+ "LastModified__c": "2026-04-15T08:23:11.442000"
14
+ },
15
+ "prepend": [
16
+ {
17
+ "dmo_name": "udmo_1__dlm",
18
+ "field_name": "ResolvedFilePath__c",
19
+ "value": "udlo_1__dll:employee_handbook/esop_guide.pdf"
20
+ }
21
+ ]
22
+ }
23
+ },
24
+ {
25
+ "text": "Data Privacy and Security Policy\n\nIntroduction and Scope\n\nThis policy establishes the framework for protecting confidential and sensitive information within our organization. It applies to all employees, contractors, consultants, and third parties who have access to company systems or data. The policy covers all forms of information, whether stored electronically, on paper, or transmitted verbally. Compliance with this policy is mandatory and violations may result in disciplinary action up to and including termination of employment.\n\nData Classification Standards\n\nAll company data must be classified according to sensitivity level. Public data can be freely shared without risk to the organization. Internal data is intended for employees only and should not be shared externally without approval. Confidential data includes business plans, financial records, and employee information that could cause significant harm if disclosed. Restricted data includes trade secrets, personal identifiable information (PII), and regulated data that must comply with specific legal requirements like GDPR, HIPAA, or CCPA.\n\nAccess Control and Authentication Requirements\n\nAccess to company systems and data is granted on a need-to-know basis following the principle of least privilege. All users must authenticate using strong passwords that meet complexity requirements: minimum 12 characters, including uppercase, lowercase, numbers, and special characters. Multi-factor authentication (MFA) is required for all remote access and privileged accounts. Passwords must be changed every 90 days and cannot reuse the previous 12 passwords. Sharing of credentials is strictly prohibited.\n\nData Handling and Transmission Security\n\nWhen transmitting confidential or restricted data, encryption must be used. Email containing sensitive information should be encrypted using approved tools. File transfers must use secure protocols like SFTP or HTTPS. Physical documents containing sensitive information must be stored in locked cabinets when not in use and shredded when no longer needed. Laptops and mobile devices must use full-disk encryption and automatic screen locking after 5 minutes of inactivity.\n\nIncident Response and Reporting Obligations\n\nAny suspected or actual security incident must be reported immediately to the Information Security team. Incidents include unauthorized access attempts, malware infections, lost or stolen devices, or accidental disclosure of sensitive information. Do not attempt to investigate or remediate security incidents yourself. The Security team will coordinate the response, including containment, investigation, remediation, and required notifications. For data breaches involving PII, regulatory notification requirements may apply within 72 hours of discovery.",
26
+ "metadata": {
27
+ "type": "text",
28
+ "page_number": 1,
29
+ "source_dmo_fields": {
30
+ "FilePath__c": "policies/data_privacy_security.pdf",
31
+ "Size__c": 1923456,
32
+ "ContentType__c": "pdf",
33
+ "LastModified__c": "2026-03-01T14:52:33.127000"
34
+ }
35
+ }
36
+ },
37
+ {
38
+ "text": "Product Launch Strategy: CloudSync Pro Q2 2026\n\nExecutive Summary\n\nCloudSync Pro represents our entry into the enterprise data synchronization market, targeting organizations with hybrid cloud infrastructures. Our research indicates strong demand for real-time data replication across on-premise and cloud environments. The total addressable market is estimated at $4.2B globally, growing at 23% annually. We aim to capture 3% market share within 18 months, generating $126M in annual recurring revenue. This launch is critical to our strategic objective of expanding beyond SMB customers into enterprise accounts.\n\nTarget Customer Profile and Pain Points\n\nOur primary target is IT Directors and Cloud Architects at mid-to-large enterprises (1000+ employees) operating hybrid infrastructure. These customers struggle with data consistency across distributed systems, experiencing latency issues, sync failures, and compliance challenges. Current solutions require significant custom development and ongoing maintenance. Our research shows customers spend an average of $340K annually on data integration tools and engineering resources. They need a solution that reduces integration complexity while providing real-time synchronization guarantees.\n\nCompetitive Landscape and Differentiation\n\nThe market leaders are DataSync Enterprise (32% share) and ReplicaCloud (28% share), both offering batch-oriented synchronization with 5-15 minute latencies. Our key differentiator is true real-time replication with sub-second latency using change data capture (CDC) technology. Additionally, our pricing model is consumption-based ($0.02 per GB transferred) versus competitors' per-connector licensing ($5K-15K per connector annually). This makes CloudSync Pro 40% more cost-effective for high-volume use cases while eliminating the capacity planning burden.\n\nGo-to-Market Strategy and Channel Plan\n\nWe will launch through a hybrid direct and partner model. Direct sales will target Fortune 2000 accounts through our existing enterprise sales team, augmented with three new cloud specialist hires. Partner channels include cloud marketplaces (AWS, Azure, GCP) and system integrators. We are finalizing partnerships with Deloitte and Accenture to include CloudSync Pro in their cloud migration practices. Marketing will emphasize analyst relations (targeting Gartner Magic Quadrant inclusion), thought leadership content, and targeted account-based marketing campaigns.\n\nRevenue Model and Financial Projections\n\nPricing is based on data volume transferred: $0.02/GB for the first 100TB monthly, $0.015/GB for 100-500TB, and $0.01/GB above 500TB. Average customer is projected at 250TB monthly ($4,375 MRR, $52,500 ARR). We forecast 100 customers by end of Q4 2026, 300 by end of 2027. Year 1 revenue target is $2.1M, scaling to $15.6M in Year 2 and $47M in Year 3. Gross margins are expected at 78% at scale. Initial investment includes $3.2M in product development (already completed), $2.8M in go-to-market expenses, and $1.5M in infrastructure scaling over 18 months.",
39
+ "metadata": {
40
+ "type": "text",
41
+ "page_number": 1,
42
+ "source_dmo_fields": {
43
+ "FilePath__c": "product_strategy/cloudsync_launch_plan.pdf",
44
+ "Size__c": 3156789,
45
+ "ContentType__c": "pdf",
46
+ "LastModified__c": "2026-04-28T11:18:47.893000"
47
+ }
48
+ }
49
+ }
50
+ ]
51
+ }
@@ -1,27 +0,0 @@
1
- # Copyright (c) 2025, Salesforce, Inc.
2
- # SPDX-License-Identifier: Apache-2
3
- #
4
- # Licensed under the Apache License, Version 2.0 (the "License");
5
- # you may not use this file except in compliance with the License.
6
- # You may obtain a copy of the License at
7
- #
8
- # http://www.apache.org/licenses/LICENSE-2.0
9
- #
10
- # Unless required by applicable law or agreed to in writing, software
11
- # distributed under the License is distributed on an "AS IS" BASIS,
12
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
- # See the License for the specific language governing permissions and
14
- # limitations under the License.
15
-
16
- from datacustomcode.client import Client
17
- from datacustomcode.credentials import AuthType, Credentials
18
- from datacustomcode.io.reader.query_api import QueryAPIDataCloudReader
19
- from datacustomcode.io.writer.print import PrintDataCloudWriter
20
-
21
- __all__ = [
22
- "AuthType",
23
- "Client",
24
- "Credentials",
25
- "PrintDataCloudWriter",
26
- "QueryAPIDataCloudReader",
27
- ]
@@ -1,14 +0,0 @@
1
- # Copyright (c) 2025, Salesforce, Inc.
2
- # SPDX-License-Identifier: Apache-2
3
- #
4
- # Licensed under the Apache License, Version 2.0 (the "License");
5
- # you may not use this file except in compliance with the License.
6
- # You may obtain a copy of the License at
7
- #
8
- # http://www.apache.org/licenses/LICENSE-2.0
9
- #
10
- # Unless required by applicable law or agreed to in writing, software
11
- # distributed under the License is distributed on an "AS IS" BASIS,
12
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
- # See the License for the specific language governing permissions and
14
- # limitations under the License.