clelandlab-HAL 0.0.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,18 @@
1
+ from google.genai import types
2
+ from . import memory
3
+ from .HAL_gather_document import gather_document
4
+ from .utils import add_generative_cost, docs2text, sequence2text
5
+ from .display import log
6
+
7
+ def answer(prompt, sequence):
8
+ docs = gather_document(prompt)
9
+ system_instruction = f"You are a researcher on experimental quantum computing. Answer the question concisely with NO comments. Use the provided context and the following documents (you might refer to document title, but NOT document number):\n\n{docs2text(docs)}"
10
+ model = memory.session.get("model", "gemini-flash-latest")
11
+ log(f"[HAL] Answering ({model})...", "Answering")
12
+ res = memory.client.models.generate_content(
13
+ model=model,
14
+ config=types.GenerateContentConfig(system_instruction=system_instruction),
15
+ contents=f"Context:\n\n{sequence2text(sequence)}\n\nQuestion:\n\n{prompt}"
16
+ )
17
+ add_generative_cost(res)
18
+ return res.text
@@ -0,0 +1,61 @@
1
+ from google.genai import types
2
+ import json
3
+ from . import memory
4
+ from .HAL_gather_document import gather_document
5
+ from .utils import add_generative_cost, docs2text, state_type2text, get_exec_import
6
+ from .display import log
7
+
8
+ system_instruction = lambda docs, import_variable, STATE: f"""You are a world class programming AI that generates Python code based on requirements. Write clear and concise code using the given documents.
9
+
10
+ # Coding Guidelines
11
+
12
+ The code should be runnable. Absolutely NO comments, NO explanations, NO side behaviors like printing messages. Do NOT use try-except to wrap all the code, it is taken care of by the caller. If any user input is really necessary (e.g. missing data directory), specify them in `request_input`, which should be a code snippet that assigns values to variables in `STATE`. It will be modified by the user to input the necessary values.
13
+
14
+
15
+ In addition to all the imported packages below, you have two global variables: `STATE` and `INVOKE`:
16
+ 1. `STATE` is a dictionary that persists across steps. Use it to store any variables or data that need to be retained or exported. Note that you cannot assign to `STATE`, you can only modify its contents.
17
+ - `STATE["SIGNAL"]` is a special variable for signal. SIGNAL should be a short string in natural language, describing the key outcome of the code execution. If there is no signal description in prompt, set it to "SUCCESS" or a descriptive error message.
18
+ 2. `INVOKE` is a function that can be used to directly run other code segments or steps. `INVOKE("Code Segment [ID]")` can invoke a code segment in documents. When possible, you should use `INVOKE` instead of repeating code segments in documents.
19
+ - Sometimes you may be instructed to invoke a number, e.g., `INVOKE(3)`, when the manager decides to run a previous step. Faithfully follow the instruction to invoke the specified step.
20
+
21
+ ## Existing Variables in STATE
22
+
23
+ Take the following variables as given. Do NOT check or request user input! Not every variable is relevant to your task. Only use the specified or relevant variables.
24
+
25
+ {state_type2text(STATE)}
26
+
27
+ ## Documents
28
+
29
+ {docs2text(docs)}
30
+
31
+ ## Imports
32
+
33
+ The following packages are already imported and ready to use. Do NOT import these packages again!
34
+
35
+ ```python
36
+ {get_exec_import(import_variable)}
37
+ ```"""
38
+
39
+ def code(prompt, import_variable={ "name": "HAL" }, _doc={}):
40
+ docs = gather_document(prompt)
41
+ _doc["code"] = list(map(lambda d: d["id"], docs))
42
+ model = memory.session.get("model", "gemini-flash-latest")
43
+ log(f"[HAL] Coding ({model})...", "Coding")
44
+ res = memory.client.models.generate_content(
45
+ model=model,
46
+ config=types.GenerateContentConfig(
47
+ response_mime_type="application/json",
48
+ response_schema=types.Schema(type=types.Type.OBJECT, required=["code"], properties={
49
+ "code": types.Schema(type=types.Type.STRING),
50
+ "request_input": types.Schema(type=types.Type.STRING, description="some lines of code assigning values to variables in STATE. ONLY assignment statements are allowed. This will be modified by the user.")
51
+ }),
52
+ system_instruction=system_instruction(docs, import_variable, memory.session["STATE"])
53
+ ),
54
+ contents=prompt
55
+ )
56
+ add_generative_cost(res)
57
+ r = json.loads(res.text)
58
+ request_input = r.get("request_input")
59
+ if request_input == "":
60
+ request_input = None
61
+ return r["code"], request_input
@@ -0,0 +1,66 @@
1
+ from google.genai import types
2
+ from . import memory
3
+ import json
4
+ from .utils import docs2text, add_generative_cost
5
+ from .display import log
6
+
7
+ system_instruction = f"""You are a researcher preparing documents for a coming task. Your goal is to gather all relevant documents from the database. You will be shown the main task, a list of queries already searched, and the documents gathered so far.
8
+
9
+ Your task is to:
10
+ 1. **Filter:** Review all gathered documents. Identify the documents that are completely irrelevant or useless for the task. List their *indices* in the "remove" key.
11
+ 2. **Stop:** If gathered documents are sufficient or relevant queries are already searched, provide an empty list for "query". You must provide search queries if all documents are removed in the previous step.
12
+ 3. **Search:** Review the task and the *relevant* documents. Provide new search queries to find missing information or to recursively find documents/tools/methods mentioned in the relevant documents. Unless the task is to make a plan, you MUST search for ALL things refered by current documents. **Do NOT search for methods in common Python packages like "scipy", "numpy", "matplotlib", "yaml", etc.**
13
+ """
14
+
15
+ user_content = lambda task, docs, query_section: f"""# Task:
16
+
17
+ {task}
18
+
19
+ # Searched Queries (do NOT repeat these):
20
+
21
+ {query_section}
22
+
23
+ # Gathered Documents:
24
+
25
+ {docs2text(docs)}"""
26
+
27
+ filter_docs = lambda indices_to_remove, doc_id_list: [doc_id for index, doc_id in enumerate(doc_id_list) if index not in set(indices_to_remove)]
28
+
29
+ def gather_document(query, max_iterations=5):
30
+ log("[HAL] Gathering documents...", "Gathering Documents")
31
+ doc_ids = []
32
+ def search(keyword):
33
+ res = memory.search(keyword)
34
+ for doc_id, _ in res:
35
+ if doc_id not in doc_ids:
36
+ doc_ids.append(doc_id)
37
+ return len(res)
38
+ searched_queries = []
39
+ search(query)
40
+ for i in range(max_iterations):
41
+ docs = map(memory.get, doc_ids)
42
+ query_section = '\n'.join([f"- {q}" for q in searched_queries])
43
+ config = types.GenerateContentConfig(
44
+ thinking_config=types.ThinkingConfig(thinking_level="LOW"),
45
+ response_mime_type="application/json",
46
+ response_schema=types.Schema(type=types.Type.OBJECT, required=["remove", "query"], properties={
47
+ "remove": types.Schema(type=types.Type.ARRAY, items=types.Schema(type=types.Type.INTEGER)),
48
+ "query": types.Schema(type=types.Type.ARRAY, items=types.Schema(type=types.Type.STRING))}),
49
+ system_instruction=system_instruction
50
+ )
51
+ res = memory.client.models.generate_content(
52
+ model="gemini-flash-latest",
53
+ contents=user_content(query, docs, query_section),
54
+ config=config)
55
+ add_generative_cost(res)
56
+ res_json = json.loads(res.text)
57
+ doc_ids = filter_docs(res_json.get("remove", []), doc_ids)
58
+ new_queries = res_json.get("query", [])
59
+ for q in new_queries:
60
+ search(q)
61
+ searched_queries.append(q)
62
+ log(f" {i}. search: {new_queries} -> {len(doc_ids)}", "Gathering Documents")
63
+ if len(new_queries) == 0:
64
+ break
65
+ log(f" > doc count: {len(doc_ids)}")
66
+ return [memory.get(doc_id) for doc_id in doc_ids]
@@ -0,0 +1,43 @@
1
+ from google.genai import types
2
+ import json
3
+ from . import memory
4
+ from .HAL_gather_document import gather_document
5
+ from .utils import add_generative_cost, docs2text, sequence2text, state_type2text
6
+ from .display import log
7
+
8
+ system_instruction = lambda docs: f"""You are a research manager leading a team. Given the step history, make a concise plan for the next step.
9
+
10
+ Your team members can access all the documents, but NOT the step history. Make sure to provide sufficient details in the prompt to make your team members work without the step history, like the detailed information from user input.
11
+
12
+ Do NOT repeat document content in the prompt. Specify relevant documents title or keyword so that your team members can search for them. It's not recommended to refer to plan documents unless necessary. Do NOT use document indices, they are not accessible to your team members.
13
+
14
+ SIGNAL is a special string variable that describes the key outcome of the step. It can include some critical numbers like goodness of fitting, or short messages like "SUCCESS" or error. Provide a SIGNAL description in your prompt, so that your team can present the result to you.
15
+
16
+ If you want to repeat a step, execute a step for multiple times, or execute a code segment, ask your team to INVOKE it by its step index or the code segment name in the prompt.
17
+
18
+ If the task requested by the user is completed, set the step type to "end" and output empty prompt.
19
+
20
+ You may literally use an existing plan, with modification or added information. Refer to the following documents to make the plan:
21
+
22
+ {docs2text(docs)}"""
23
+
24
+ def plan(sequence, _doc={}):
25
+ docs = gather_document(f'**Make a plan for the next step. ONLY find a few relevant high-level plan documents. All implementation details are irrelevant.**\n\nStep history:\n\n{sequence2text(sequence)}')
26
+ _doc["plan"] = list(map(lambda d: d["id"], docs))
27
+ model = memory.session.get("model", "gemini-flash-latest")
28
+ log(f"[HAL] Planning ({model})...", "Planning")
29
+ config = types.GenerateContentConfig(
30
+ system_instruction=system_instruction(docs),
31
+ response_mime_type="application/json",
32
+ response_schema=types.Schema(type=types.Type.OBJECT, required=["type", "prompt"], properties={
33
+ "type": types.Schema(type=types.Type.STRING, description="Type of the next step, one of: 'code', 'end'."),
34
+ "prompt": types.Schema(type=types.Type.STRING, description="Prompt for your team to complete the step, as a prompt for a large language model.")
35
+ })
36
+ )
37
+ res = memory.client.models.generate_content(
38
+ model=model,
39
+ config=config,
40
+ contents=f"# Step history:\n\n{sequence2text(sequence)}\n\n # Current STATE variables:\n\n{state_type2text(memory.session['STATE'])}"
41
+ )
42
+ add_generative_cost(res)
43
+ return json.loads(res.text)
@@ -0,0 +1,28 @@
1
+ from google.genai import types
2
+ import json
3
+ from . import memory
4
+ from .utils import add_generative_cost
5
+ from .display import log
6
+
7
+ system_instruction = '''You are an expert at classifying user prompts into categories. Given a user prompt, classify it into one of the following categories:
8
+
9
+ - "query": if the prompt is asking for an answer, explanation, or natural language response, like **how to** write a function, why something is wrong, or to write a document.
10
+ - "action": if the prompt is requesting to perform a task or action, like take a measurement or run a data analysis or fix something.
11
+ '''
12
+
13
+ def sort(prompt):
14
+ log("[HAL] Sorting...", "Sorting")
15
+ config = types.GenerateContentConfig(
16
+ thinking_config=types.ThinkingConfig(thinking_level="LOW"),
17
+ system_instruction=system_instruction,
18
+ response_mime_type="application/json",
19
+ response_schema=types.Schema(type=types.Type.OBJECT, required=["category"], properties={ "category": types.Schema(type=types.Type.STRING) })
20
+ )
21
+ res = memory.client.models.generate_content(
22
+ model="gemini-flash-latest",
23
+ config=config,
24
+ contents=prompt
25
+ )
26
+ category = json.loads(res.text)["category"]
27
+ log(f" > {category}")
28
+ return category
@@ -0,0 +1 @@
1
+ 0.0.0
@@ -0,0 +1,175 @@
1
+ import sys, os, json, random, string, time
2
+ from google import genai
3
+ import ipywidgets as widgets
4
+ from IPython.display import display as _display
5
+ from . import memory, utils, display, run
6
+ from .HAL_gather_document import gather_document
7
+ from .HAL_sort import sort
8
+ from .HAL_plan import plan
9
+ from .HAL_answer import answer
10
+ from .HAL_code import code
11
+
12
+ handlers = {}
13
+
14
+ def HAL(query=None):
15
+ if query is not None and "open the pod bay doors" in query.casefold():
16
+ return display.show("I'm sorry, Dave. I'm afraid I can't do that.")
17
+ while True:
18
+ sequence = memory.session["sequence"]
19
+ HAL.auto -= 1
20
+ if "SIGNAL" in memory.session["STATE"]:
21
+ sequence[-1]["SIGNAL"] = memory.session["STATE"]["SIGNAL"]
22
+ del memory.session["STATE"]["SIGNAL"]
23
+ original_cost = memory.session.get("cost", 0)
24
+ log_cost = lambda: display.log(f"[HAL] Cost: ${memory.session.get('cost', 0)-original_cost:.5f}. (Session Total: ${memory.session.get('cost', 0):.5f})\n")
25
+ start_time = time.time()
26
+ if query is not None:
27
+ category = sort(query)
28
+ if category == "query":
29
+ res = answer(query, sequence)
30
+ log_cost()
31
+ return display.show(res)
32
+ sequence.append({ "user input": query, "_type": "user input" })
33
+ if len(sequence) == 0:
34
+ return display.show("HAL is ready.")
35
+ if sequence[-1].get("_type", "") == "end":
36
+ return display.show("HAL session has ended. Please reset the session using `HAL.reset()`.")
37
+ display.sequence(sequence)
38
+ step = { "_doc": {} }
39
+ res = plan(sequence, _doc=step["_doc"])
40
+ step["_type"], step["prompt"] = res["type"], res["prompt"]
41
+ display.log(f" > {step['_type']}")
42
+ sequence.append(step)
43
+ display.sequence(sequence)
44
+ pause = handlers[step["_type"]](step)
45
+ display.log(f"[HAL] Step time: {time.time()-start_time:.2f} s")
46
+ log_cost()
47
+ display.sequence(sequence)
48
+ query = None
49
+ if pause:
50
+ return display.show("HAL sequence is paused.")
51
+ if HAL.auto <= 0 or step["_type"] == "end":
52
+ HAL.auto = 0
53
+ return display.show("HAL auto is stopped.")
54
+
55
+ sys.modules[__name__] = HAL
56
+
57
+ HAL.auto = 0
58
+ HAL.session = memory.session
59
+
60
+ HAL.memory = memory
61
+ HAL.display = display
62
+ HAL.utils = utils
63
+
64
+ HAL.gather_document = gather_document
65
+ HAL.sort = sort
66
+ HAL.plan = plan
67
+ HAL.answer = answer
68
+ HAL.code = code
69
+
70
+ _invoke = lambda name=None, import_variable={}: run.invoke(name, import_variable={ **memory.session, **import_variable })
71
+
72
+ def _export_ctx():
73
+ main_namespace = sys.modules.get('__main__')
74
+ main_namespace.STATE = memory.session["STATE"]
75
+ main_namespace.INVOKE = _invoke
76
+ HAL.session = memory.session
77
+
78
+ def _init(name="", _config=None):
79
+ memory.session["name"] = name
80
+ if _config is None:
81
+ _config = os.path.join(os.path.dirname(__file__), "../config.json")
82
+ if isinstance(_config, dict):
83
+ utils.config.update(_config)
84
+ if isinstance(_config, str):
85
+ utils.config.update(json.load(open(_config, "r")))
86
+ memory.client = genai.Client(api_key=utils.config["GEMINI_API_KEY"])
87
+ display.init()
88
+ memory.load()
89
+ display.log("[HAL] Initialized.")
90
+ HAL.reset()
91
+ HAL.init = _init
92
+
93
+ def _reset():
94
+ memory.session.update({ "cost": 0.0, "sequence": [], "STATE": {} })
95
+ display.log("[HAL] Session reset.")
96
+ display.sequence(memory.session["sequence"])
97
+ _export_ctx()
98
+ HAL.reset = _reset
99
+
100
+ def _save(path="session.json"):
101
+ display.log(f"[HAL] Session saved to {path}")
102
+ return json.dump(memory.session, open(path, "w"), indent=2)
103
+ HAL.save = _save
104
+
105
+ def _load(path="session.json"):
106
+ display.log(f"[HAL] Session loaded from {path}")
107
+ memory.session.update(json.load(open(path, "r")))
108
+ display.sequence(memory.session.get("sequence", []))
109
+ _export_ctx()
110
+ HAL.load = _load
111
+
112
+ def _search(*args, **kwargs):
113
+ res = memory.search(*args, **kwargs)
114
+ texts = []
115
+ for id, score in res:
116
+ doc = memory.get(id)
117
+ r = f"### `{doc["id"]}`\n- **score**: {score}\n"
118
+ for k, v in doc.items():
119
+ if k in ["id", "content", "embedding"]:
120
+ continue
121
+ r += f"- **{k}**: {v}\n"
122
+ r += f"\n\n{doc['content']}\n\n"
123
+ texts.append(r)
124
+ return display.docs(texts)
125
+ HAL.search = _search
126
+
127
+ def _memorize(content=None, meta={}):
128
+ if memory.session["name"] == "HAL":
129
+ return print("[HAL] Error: Please set name to memorize")
130
+ if "source" not in meta:
131
+ meta["source"] = memory.session["name"]
132
+ if isinstance(content, str):
133
+ return memory.add(content, meta)
134
+ if content is None:
135
+ content = len(memory.session.get("sequence", [])) - 1
136
+ if isinstance(content, int):
137
+ seq = memory.session.get("sequence", [])
138
+ if content < 0 or content >= len(seq):
139
+ return print(f"[HAL] Error: Invalid sequence [{content}]")
140
+ step = seq[content]
141
+ if "prompt" not in step or "_code" not in step:
142
+ return print(f"[HAL] Error: Sequence [{content}] does not contain a valid step to memorize")
143
+ n = "".join(random.choices(string.ascii_uppercase, k=2)) + str(len(memory.data.keys()))
144
+ c = f"# Code Segment {n}:\n\n## Prompt:\n\n{step['prompt']}\n\n## Code:\n\nYou can directly run the following code by calling `INVOKE('Code Segment {n}')`\n\n```python\n{step['_code']}\n```"
145
+ meta["invoke"] = 1
146
+ return memory.add(c, meta)
147
+ return print("[HAL] Error: Unsupported content type for memorize")
148
+ HAL.memorize = _memorize
149
+
150
+ def end_handler(step):
151
+ step["prompt"] = "Session ended."
152
+ display.show("[HAL] session ended.")
153
+ return False
154
+ handlers["end"] = end_handler
155
+
156
+ def code_handler(step):
157
+ STATE = memory.session["STATE"]
158
+ c, request_input = code(step["prompt"], import_variable=memory.session, _doc=step["_doc"])
159
+ step["_code"] = c
160
+ display.sequence(memory.session["sequence"])
161
+ if HAL.auto >= 0 and (request_input is None):
162
+ display.log(f"[HAL] Executing...", "Executing")
163
+ try:
164
+ run.execute(c, import_variable=memory.session)
165
+ print("Execution Completed with SIGNAL: ", STATE.get("SIGNAL", ""))
166
+ except Exception as err:
167
+ STATE["SIGNAL"] = f"Runtime Error: {type(e).__name__}: {str(err)}"
168
+ print("Execution Error: ", str(err))
169
+ display.log(f"[HAL] Execution completed with SIGNAL: {STATE['SIGNAL']}")
170
+ display.sequence(memory.session["sequence"])
171
+ return False
172
+ display.new_cell((f"# [HAL] Requesting user input:\n{request_input}\n\n" if request_input is not None else "") + f"# [HAL] Imports:\n{utils.get_exec_import(memory.session)}\n\n# [HAL] Code:\n{c}")
173
+ return True
174
+ handlers["code"] = code_handler
175
+
@@ -0,0 +1,61 @@
1
+ import yaml
2
+ import ipywidgets as widgets
3
+ from IPython.display import display, Markdown
4
+ from IPython.core.getipython import get_ipython
5
+
6
+ log_output = widgets.Output(layout={'height': '300px', 'overflow_y': 'auto'})
7
+ log_accordion = widgets.Accordion(children=[log_output], titles=["[HAL]"])
8
+ sequence_accordion = widgets.Accordion()
9
+
10
+ def get_markdown_output(content):
11
+ out = widgets.Output()
12
+ with out:
13
+ display(Markdown(content))
14
+ return out
15
+
16
+ def init():
17
+ display(log_accordion)
18
+ display(sequence_accordion)
19
+
20
+ def log(content, status="Idle"):
21
+ log_accordion.set_title(0, f"Status [HAL] {status}")
22
+ with log_output:
23
+ print(content)
24
+
25
+ def show(content):
26
+ display(Markdown("---\n\n" + content + "\n\n---\n\n"))
27
+ return content
28
+
29
+ def new_cell(content):
30
+ get_ipython().set_next_input(content, replace=False)
31
+
32
+ def sequence(seq):
33
+ sequence_accordion.children = []
34
+ for i, step in enumerate(seq):
35
+ tab = widgets.Tab()
36
+ for key in step:
37
+ if key == "_type":
38
+ continue
39
+ content = step[key]
40
+ if "code" in key:
41
+ content = f"```python\n{content}\n```"
42
+ if key == "_doc":
43
+ content = f"```yaml\n{yaml.dump(content)}\n```"
44
+ tab.children += (get_markdown_output(content),)
45
+ tab.set_title(len(tab.children) - 1, key)
46
+ tab.selected_index = 1 if len(tab.children) > 1 else 0
47
+ sequence_accordion.children += (tab,)
48
+ sequence_accordion.set_title(i, f"sequence [{i}] {step.get('_type', '')}")
49
+
50
+ def docs(doc_texts):
51
+ n = len(doc_texts)
52
+ out = widgets.Output()
53
+ def on_slider_change(change):
54
+ i = change['new']
55
+ out.clear_output()
56
+ with out:
57
+ display(Markdown(doc_texts[i]))
58
+ slider = widgets.IntSlider(value=0, min=0, max=n-1, step=1, description=f'Docs ({n}):', continuous_update=False)
59
+ slider.observe(on_slider_change, names='value')
60
+ on_slider_change({'new': 0})
61
+ display(slider, out)
@@ -0,0 +1,78 @@
1
+ import time, json, gzip, hashlib, os, datetime
2
+ import numpy as np
3
+ from .display import log
4
+ from .utils import add_embedding_cost, config
5
+
6
+ session = {
7
+ "name": "HAL",
8
+ "cost": 0.0,
9
+ "sequence": [],
10
+ "STATE": {}
11
+ }
12
+
13
+ clent = None # gemini client
14
+
15
+ data = {}
16
+
17
+ # helper functions
18
+ cos_sim = lambda v1, v2: np.dot(v1, v2) / (np.linalg.norm(v1) * np.linalg.norm(v2))
19
+ def sha256str(s):
20
+ h = hashlib.sha256()
21
+ h.update(s.encode('utf-8'))
22
+ return h.hexdigest()
23
+ def embed(content, task_type="retrieval_document"):
24
+ try:
25
+ model = "gemini-embedding-001"
26
+ add_embedding_cost(client.models.count_tokens(model=model, contents=content))
27
+ return client.models.embed_content(model=model, contents=content, config={"task_type": task_type}).embeddings[0].values
28
+ except:
29
+ return None
30
+
31
+ # operations
32
+ def load():
33
+ global data, mean_embedding
34
+ try:
35
+ with gzip.open(config["MEMORY_DATA_PATH"], 'rt') as f:
36
+ data = json.load(f)
37
+ m_time_timestamp = os.path.getmtime(config["MEMORY_DATA_PATH"])
38
+ m_time_datetime = datetime.datetime.fromtimestamp(m_time_timestamp)
39
+ log(f"[HAL] Memory loaded: {len(data)} documents (version {m_time_datetime})")
40
+ except:
41
+ data = {}
42
+ return data
43
+ def save():
44
+ with gzip.open(config["MEMORY_DATA_PATH"], 'wt') as f:
45
+ json.dump(data, f)
46
+
47
+ def add(content, meta={}):
48
+ doc_id = sha256str(content)
49
+ data_dict = {"content": content, "embedding": embed(content)}
50
+ meta["time"] = int(time.time())
51
+ data_dict.update(meta)
52
+ data[doc_id] = data_dict
53
+ return doc_id
54
+ def get(doc_id):
55
+ global data
56
+ doc = dict(data[doc_id])
57
+ doc.update({"id": doc_id})
58
+ return doc
59
+ def delete(doc_id):
60
+ del data[doc_id]
61
+
62
+ # return a list of (doc_id, score)
63
+ def search(q, maxn=5, cutoff_gradient=0.05, threshold=0.6, filter=None):
64
+ scores = []
65
+ score = 0
66
+ q_embedding = embed(q, task_type="retrieval_query")
67
+ for doc_id, data_dict in data.items():
68
+ if filter is not None and not filter(data_dict):
69
+ continue
70
+ score = cos_sim(q_embedding, np.array(data_dict['embedding']))
71
+ if score < threshold:
72
+ continue
73
+ scores.append((doc_id, score))
74
+ scores.sort(key=lambda x: x[1], reverse=True)
75
+ for i in range(min(maxn, len(scores) - 1)):
76
+ if scores[i][1] - scores[i+1][1] >= cutoff_gradient:
77
+ return scores[:(i+1)]
78
+ return scores[:maxn]
@@ -0,0 +1,28 @@
1
+ import re
2
+ from . import memory
3
+ from .utils import get_exec_import
4
+
5
+ def execute(code, import_variable={ "name": "HAL" }):
6
+ STATE = memory.session["STATE"]
7
+ STATE["SIGNAL"] = ""
8
+ _code = get_exec_import(import_variable) + "\n\n" + code
9
+ INVOKE = lambda name: invoke(name, import_variable=import_variable)
10
+ exec(_code, { "STATE": STATE, "INVOKE": INVOKE }, { "STATE": STATE, "INVOKE": INVOKE })
11
+
12
+ def invoke(name=None, import_variable={ "name": "HAL" }):
13
+ code = ""
14
+ if name is None:
15
+ name = -1
16
+ if isinstance(name, int):
17
+ step = memory.session["sequence"][name]
18
+ code = step.get("_code", "")
19
+ if isinstance(name, str):
20
+ docs = memory.search(name, filter=lambda d: "invoke" in d and d["invoke"])
21
+ if len(docs) == 0:
22
+ return
23
+ doc = memory.get(docs[0][0])
24
+ code_match = re.search(r"```python\n(.*?)\n```", doc["content"], re.DOTALL)
25
+ code = code_match.group(1) if code_match else ""
26
+ if code == "":
27
+ return
28
+ execute(code, import_variable=import_variable)
@@ -0,0 +1,56 @@
1
+ from . import memory
2
+
3
+ config = {}
4
+
5
+ def docs2text(docs):
6
+ res = ""
7
+ for i, doc in enumerate(docs):
8
+ res += f"--- Document {i} ---\n\n{doc['content']}\n\n"
9
+ res += "--- End of Documents ---"
10
+ return res
11
+
12
+ def sequence2text(sequence):
13
+ res = ""
14
+ for i, step in enumerate(sequence):
15
+ res += f"--- Step {i} ---\n\n"
16
+ for k, v in step.items():
17
+ if k[0] != "_":
18
+ res += f"{k}: {v}\n"
19
+ res += "\n"
20
+ res += "--- End of Steps ---"
21
+ return res
22
+
23
+ def state_type2text(STATE):
24
+ s = ""
25
+ for k, v in STATE.items():
26
+ s += f"- `{k}`({type(v).__name__})\n"
27
+ return s
28
+
29
+ evalStr = lambda s, var: eval(f"f'''{s}'''", None, var)
30
+
31
+ get_exec_import = lambda var: evalStr(config["EXEC_IMPORT"], var)
32
+
33
+ prices = {
34
+ "gemini-embedding-001": 0.15/1e6,
35
+ "gemini-3.1-pro-preview": (2/1e6, 12/1e6),
36
+ "gemini-3-flash-preview": (0.5/1e6, 3/1e6)
37
+ }
38
+
39
+ def add_embedding_cost(res):
40
+ total_cost = res.total_tokens * prices["gemini-embedding-001"]
41
+ memory.session["cost"] += total_cost
42
+ return total_cost
43
+
44
+ def add_generative_cost(res):
45
+ v = res.model_version
46
+ u = res.usage_metadata
47
+ if v not in prices:
48
+ print(f"[HAL] Warning: model version {v} not found in prices list. Cost estimation will be inaccurate.")
49
+ p = prices.get(v, (0, 0))
50
+ input_token_count = u.prompt_token_count
51
+ output_token_count = u.total_token_count - u.prompt_token_count
52
+ total_cost = p[0] * input_token_count + p[1] * output_token_count
53
+ memory.session["cost"] += total_cost
54
+ if input_token_count > 2e5:
55
+ print(f"[HAL] Warning: input token count({input_token_count}) exceeds 200k tokens. Cost estimation will be inaccurate.")
56
+ return total_cost
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2025 Cleland Lab
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,101 @@
1
+ Metadata-Version: 2.4
2
+ Name: clelandlab-HAL
3
+ Version: 0.0.0
4
+ Summary: Heuristic Autonomous Lab
5
+ Home-page: https://github.com/clelandlab/HAL
6
+ Author: Cleland Lab
7
+ Author-email: clelandlab@proton.me
8
+ Project-URL: Source, https://github.com/clelandlab/HAL
9
+ Project-URL: Tracker, https://github.com/clelandlab/HAL/issues
10
+ Keywords: LLM,AI,quantum,experiment,measurement,qubit,control
11
+ Classifier: Development Status :: 3 - Alpha
12
+ Classifier: Intended Audience :: Developers
13
+ Classifier: Topic :: Software Development :: Build Tools
14
+ Classifier: License :: OSI Approved :: MIT License
15
+ Classifier: Programming Language :: Python :: 3
16
+ Classifier: Programming Language :: Python :: 3.8
17
+ Classifier: Programming Language :: Python :: 3.9
18
+ Classifier: Programming Language :: Python :: 3.10
19
+ Classifier: Programming Language :: Python :: 3.11
20
+ Classifier: Programming Language :: Python :: 3.12
21
+ Classifier: Programming Language :: Python :: 3 :: Only
22
+ Requires-Python: >=3.8, <4
23
+ Description-Content-Type: text/markdown
24
+ License-File: LICENSE
25
+ Requires-Dist: google-genai
26
+ Requires-Dist: ipywidgets
27
+ Requires-Dist: ipython
28
+ Dynamic: author
29
+ Dynamic: author-email
30
+ Dynamic: classifier
31
+ Dynamic: description
32
+ Dynamic: description-content-type
33
+ Dynamic: home-page
34
+ Dynamic: keywords
35
+ Dynamic: license-file
36
+ Dynamic: project-url
37
+ Dynamic: requires-dist
38
+ Dynamic: requires-python
39
+ Dynamic: summary
40
+
41
+ # HAL
42
+
43
+ Heuristic Autonomous Lab
44
+
45
+ ## Config
46
+
47
+ Create a `config.json` with the following content:
48
+
49
+ ```json
50
+ {
51
+ "GEMINI_API_KEY": "your gemini API key",
52
+ "MEMORY_DATA_PATH": "/path/to/the/memory/data.gz",
53
+ "EXEC_IMPORT": "import time, os, sys, json, yaml, scipy\nimport numpy as np\nimport matplotlib.pyplot as plt\n"
54
+ }
55
+ ```
56
+
57
+ ## Get Started
58
+
59
+ ```python
60
+ import HAL
61
+ HAL.init() # initialization: loading memory, setting up display, etc.
62
+ # HAL.auto = 3 # auto-execution: HAL will automatically execute up to 3 steps
63
+
64
+ HAL.reset() # this reset HAL session
65
+ HAL("Do something") # main interface: query HAL
66
+
67
+ HAL() # continue without user input
68
+ ```
69
+
70
+ ## API
71
+
72
+ ```python
73
+ # initialization
74
+ HAL.init("Name", _config=None)
75
+
76
+ # main interface
77
+ HAL(query=None)
78
+
79
+ # properties
80
+ HAL.auto = 0
81
+ HAL.session = {}
82
+
83
+ # session operations
84
+ HAL.reset()
85
+ HAL.save(path="session.json")
86
+ HAL.load(path="session.json")
87
+
88
+ # memory operations
89
+ HAL.search(query)
90
+ HAL.memorize(content, meta={ "source": HAL.name })
91
+
92
+ # low-level components
93
+ HAL.memory
94
+ HAL.display
95
+ # Agents
96
+ HAL.gather_document
97
+ HAL.sort
98
+ HAL.plan
99
+ HAL.answer
100
+ HAL.code
101
+ ```
@@ -0,0 +1,61 @@
1
+ # HAL
2
+
3
+ Heuristic Autonomous Lab
4
+
5
+ ## Config
6
+
7
+ Create a `config.json` with the following content:
8
+
9
+ ```json
10
+ {
11
+ "GEMINI_API_KEY": "your gemini API key",
12
+ "MEMORY_DATA_PATH": "/path/to/the/memory/data.gz",
13
+ "EXEC_IMPORT": "import time, os, sys, json, yaml, scipy\nimport numpy as np\nimport matplotlib.pyplot as plt\n"
14
+ }
15
+ ```
16
+
17
+ ## Get Started
18
+
19
+ ```python
20
+ import HAL
21
+ HAL.init() # initialization: loading memory, setting up display, etc.
22
+ # HAL.auto = 3 # auto-execution: HAL will automatically execute up to 3 steps
23
+
24
+ HAL.reset() # this reset HAL session
25
+ HAL("Do something") # main interface: query HAL
26
+
27
+ HAL() # continue without user input
28
+ ```
29
+
30
+ ## API
31
+
32
+ ```python
33
+ # initialization
34
+ HAL.init("Name", _config=None)
35
+
36
+ # main interface
37
+ HAL(query=None)
38
+
39
+ # properties
40
+ HAL.auto = 0
41
+ HAL.session = {}
42
+
43
+ # session operations
44
+ HAL.reset()
45
+ HAL.save(path="session.json")
46
+ HAL.load(path="session.json")
47
+
48
+ # memory operations
49
+ HAL.search(query)
50
+ HAL.memorize(content, meta={ "source": HAL.name })
51
+
52
+ # low-level components
53
+ HAL.memory
54
+ HAL.display
55
+ # Agents
56
+ HAL.gather_document
57
+ HAL.sort
58
+ HAL.plan
59
+ HAL.answer
60
+ HAL.code
61
+ ```
@@ -0,0 +1,101 @@
1
+ Metadata-Version: 2.4
2
+ Name: clelandlab-HAL
3
+ Version: 0.0.0
4
+ Summary: Heuristic Autonomous Lab
5
+ Home-page: https://github.com/clelandlab/HAL
6
+ Author: Cleland Lab
7
+ Author-email: clelandlab@proton.me
8
+ Project-URL: Source, https://github.com/clelandlab/HAL
9
+ Project-URL: Tracker, https://github.com/clelandlab/HAL/issues
10
+ Keywords: LLM,AI,quantum,experiment,measurement,qubit,control
11
+ Classifier: Development Status :: 3 - Alpha
12
+ Classifier: Intended Audience :: Developers
13
+ Classifier: Topic :: Software Development :: Build Tools
14
+ Classifier: License :: OSI Approved :: MIT License
15
+ Classifier: Programming Language :: Python :: 3
16
+ Classifier: Programming Language :: Python :: 3.8
17
+ Classifier: Programming Language :: Python :: 3.9
18
+ Classifier: Programming Language :: Python :: 3.10
19
+ Classifier: Programming Language :: Python :: 3.11
20
+ Classifier: Programming Language :: Python :: 3.12
21
+ Classifier: Programming Language :: Python :: 3 :: Only
22
+ Requires-Python: >=3.8, <4
23
+ Description-Content-Type: text/markdown
24
+ License-File: LICENSE
25
+ Requires-Dist: google-genai
26
+ Requires-Dist: ipywidgets
27
+ Requires-Dist: ipython
28
+ Dynamic: author
29
+ Dynamic: author-email
30
+ Dynamic: classifier
31
+ Dynamic: description
32
+ Dynamic: description-content-type
33
+ Dynamic: home-page
34
+ Dynamic: keywords
35
+ Dynamic: license-file
36
+ Dynamic: project-url
37
+ Dynamic: requires-dist
38
+ Dynamic: requires-python
39
+ Dynamic: summary
40
+
41
+ # HAL
42
+
43
+ Heuristic Autonomous Lab
44
+
45
+ ## Config
46
+
47
+ Create a `config.json` with the following content:
48
+
49
+ ```json
50
+ {
51
+ "GEMINI_API_KEY": "your gemini API key",
52
+ "MEMORY_DATA_PATH": "/path/to/the/memory/data.gz",
53
+ "EXEC_IMPORT": "import time, os, sys, json, yaml, scipy\nimport numpy as np\nimport matplotlib.pyplot as plt\n"
54
+ }
55
+ ```
56
+
57
+ ## Get Started
58
+
59
+ ```python
60
+ import HAL
61
+ HAL.init() # initialization: loading memory, setting up display, etc.
62
+ # HAL.auto = 3 # auto-execution: HAL will automatically execute up to 3 steps
63
+
64
+ HAL.reset() # this reset HAL session
65
+ HAL("Do something") # main interface: query HAL
66
+
67
+ HAL() # continue without user input
68
+ ```
69
+
70
+ ## API
71
+
72
+ ```python
73
+ # initialization
74
+ HAL.init("Name", _config=None)
75
+
76
+ # main interface
77
+ HAL(query=None)
78
+
79
+ # properties
80
+ HAL.auto = 0
81
+ HAL.session = {}
82
+
83
+ # session operations
84
+ HAL.reset()
85
+ HAL.save(path="session.json")
86
+ HAL.load(path="session.json")
87
+
88
+ # memory operations
89
+ HAL.search(query)
90
+ HAL.memorize(content, meta={ "source": HAL.name })
91
+
92
+ # low-level components
93
+ HAL.memory
94
+ HAL.display
95
+ # Agents
96
+ HAL.gather_document
97
+ HAL.sort
98
+ HAL.plan
99
+ HAL.answer
100
+ HAL.code
101
+ ```
@@ -0,0 +1,19 @@
1
+ LICENSE
2
+ README.md
3
+ setup.py
4
+ HAL/HAL_answer.py
5
+ HAL/HAL_code.py
6
+ HAL/HAL_gather_document.py
7
+ HAL/HAL_plan.py
8
+ HAL/HAL_sort.py
9
+ HAL/VERSION
10
+ HAL/__init__.py
11
+ HAL/display.py
12
+ HAL/memory.py
13
+ HAL/run.py
14
+ HAL/utils.py
15
+ clelandlab_HAL.egg-info/PKG-INFO
16
+ clelandlab_HAL.egg-info/SOURCES.txt
17
+ clelandlab_HAL.egg-info/dependency_links.txt
18
+ clelandlab_HAL.egg-info/requires.txt
19
+ clelandlab_HAL.egg-info/top_level.txt
@@ -0,0 +1,3 @@
1
+ google-genai
2
+ ipywidgets
3
+ ipython
@@ -0,0 +1,4 @@
1
+ [egg_info]
2
+ tag_build =
3
+ tag_date = 0
4
+
@@ -0,0 +1,46 @@
1
+ from setuptools import setup, find_packages
2
+ import pathlib
3
+
4
+ here = pathlib.Path(__file__).parent.resolve()
5
+ def get_version(rel_path):
6
+ return (here / rel_path).read_text().strip()
7
+
8
+ setup(
9
+ name="clelandlab-HAL",
10
+ version=get_version("HAL/VERSION"),
11
+ description="Heuristic Autonomous Lab",
12
+ long_description=(here / "README.md").read_text(encoding="utf-8"),
13
+ long_description_content_type="text/markdown",
14
+ url="https://github.com/clelandlab/HAL",
15
+ author="Cleland Lab",
16
+ author_email="clelandlab@proton.me",
17
+ classifiers=[
18
+ "Development Status :: 3 - Alpha",
19
+ "Intended Audience :: Developers",
20
+ "Topic :: Software Development :: Build Tools",
21
+ "License :: OSI Approved :: MIT License",
22
+ "Programming Language :: Python :: 3",
23
+ "Programming Language :: Python :: 3.8",
24
+ "Programming Language :: Python :: 3.9",
25
+ "Programming Language :: Python :: 3.10",
26
+ "Programming Language :: Python :: 3.11",
27
+ "Programming Language :: Python :: 3.12",
28
+ "Programming Language :: Python :: 3 :: Only",
29
+ ],
30
+ keywords="LLM,AI,quantum,experiment,measurement,qubit,control",
31
+ packages=find_packages(where="."),
32
+ python_requires=">=3.8, <4",
33
+ install_requires=[
34
+ "google-genai",
35
+ "ipywidgets",
36
+ "ipython"
37
+ ],
38
+ package_data={
39
+ "HAL": ["VERSION"],
40
+ },
41
+ include_package_data=True,
42
+ project_urls={
43
+ "Source": "https://github.com/clelandlab/HAL",
44
+ "Tracker": "https://github.com/clelandlab/HAL/issues",
45
+ },
46
+ )